diff --git "a/run-2024-07-14T08:43:36+00:00.log" "b/run-2024-07-14T08:43:36+00:00.log" --- "a/run-2024-07-14T08:43:36+00:00.log" +++ "b/run-2024-07-14T08:43:36+00:00.log" @@ -5872,4 +5872,1169 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 31%|███▏ | 116220/371472 [9:14:57<19:19:09, 3.67it/s] 31%|███▏ | 116221/371472 [9:14:58<19:38:52, 3.61it/s] 31%|███▏ | 116222/371472 [9:14:58<19:11:05, 3.70it/s] 31%|███▏ | 116223/371472 [9:14:58<19:26:40, 3.65it/s] 31%|███▏ | 116224/371472 [9:14:58<20:18:00, 3.49it/s] 31%|███▏ | 116225/371472 [9:14:59<21:00:19, 3.38it/s] 31%|███▏ | 116226/371472 [9:14:59<20:42:39, 3.42it/s] 31%|███▏ | 116227/371472 [9:14:59<20:40:49, 3.43it/s] 31%|███▏ | 116228/371472 [9:15:00<21:24:54, 3.31it/s] 31%|███▏ | 116229/371472 [9:15:00<22:27:53, 3.16it/s] 31%|███▏ | 116230/371472 [9:15:00<21:58:04, 3.23it/s] 31%|███▏ | 116231/371472 [9:15:01<21:59:38, 3.22it/s] 31%|███▏ | 116232/371472 [9:15:01<21:51:05, 3.24it/s] 31%|███▏ | 116233/371472 [9:15:01<21:33:36, 3.29it/s] 31%|███▏ | 116234/371472 [9:15:02<22:38:46, 3.13it/s] 31%|███▏ | 116235/371472 [9:15:02<21:56:03, 3.23it/s] 31%|███▏ | 116236/371472 [9:15:02<21:58:22, 3.23it/s] 31%|███▏ | 116237/371472 [9:15:02<22:14:16, 3.19it/s] 31%|███▏ | 116238/371472 [9:15:03<21:30:35, 3.30it/s] 31%|███▏ | 116239/371472 [9:15:03<20:12:35, 3.51it/s] 31%|███▏ | 116240/371472 [9:15:03<23:43:34, 2.99it/s] {'loss': 3.1461, 'learning_rate': 7.18707578271456e-07, 'epoch': 5.01} 31%|███▏ | 116240/371472 [9:15:03<23:43:34, 2.99it/s] 31%|███▏ | 116241/371472 [9:15:04<21:55:15, 3.23it/s] 31%|███▏ | 116242/371472 [9:15:04<21:19:03, 3.33it/s] 31%|███▏ | 116243/371472 [9:15:04<21:40:07, 3.27it/s] 31%|███▏ | 116244/371472 [9:15:05<20:47:50, 3.41it/s] 31%|███▏ | 116245/371472 [9:15:05<21:06:47, 3.36it/s] 31%|███▏ | 116246/371472 [9:15:05<21:09:03, 3.35it/s] 31%|███▏ | 116247/371472 [9:15:05<21:02:19, 3.37it/s] 31%|███▏ | 116248/371472 [9:15:06<20:27:45, 3.46it/s] 31%|███▏ | 116249/371472 [9:15:06<20:20:22, 3.49it/s] 31%|███▏ | 116250/371472 [9:15:06<19:59:56, 3.54it/s] 31%|███▏ | 116251/371472 [9:15:07<19:25:30, 3.65it/s] 31%|███▏ | 116252/371472 [9:15:07<20:04:44, 3.53it/s] 31%|███▏ | 116253/371472 [9:15:07<20:00:23, 3.54it/s] 31%|███▏ | 116254/371472 [9:15:07<21:04:41, 3.36it/s] 31%|███▏ | 116255/371472 [9:15:08<20:20:56, 3.48it/s] 31%|███▏ | 116256/371472 [9:15:08<20:31:50, 3.45it/s] 31%|███▏ | 116257/371472 [9:15:08<20:53:50, 3.39it/s] 31%|███▏ | 116258/371472 [9:15:09<20:53:59, 3.39it/s] 31%|███▏ | 116259/371472 [9:15:09<20:47:46, 3.41it/s] 31%|███▏ | 116260/371472 [9:15:09<20:51:34, 3.40it/s] {'loss': 3.3035, 'learning_rate': 7.186590962959771e-07, 'epoch': 5.01} 31%|███▏ | 116260/371472 [9:15:09<20:51:34, 3.40it/s] 31%|███▏ | 116261/371472 [9:15:09<20:02:55, 3.54it/s] 31%|███▏ | 116262/371472 [9:15:10<20:35:09, 3.44it/s] 31%|███▏ | 116263/371472 [9:15:10<19:56:53, 3.55it/s] 31%|███▏ | 116264/371472 [9:15:10<19:12:56, 3.69it/s] 31%|███▏ | 116265/371472 [9:15:11<19:08:01, 3.71it/s] 31%|███▏ | 116266/371472 [9:15:11<19:21:38, 3.66it/s] 31%|███▏ | 116267/371472 [9:15:11<19:17:02, 3.68it/s] 31%|███▏ | 116268/371472 [9:15:11<19:00:57, 3.73it/s] 31%|███▏ | 116269/371472 [9:15:12<19:46:22, 3.59it/s] 31%|███▏ | 116270/371472 [9:15:12<22:02:42, 3.22it/s] 31%|███▏ | 116271/371472 [9:15:12<22:20:10, 3.17it/s] 31%|███▏ | 116272/371472 [9:15:13<22:12:49, 3.19it/s] 31%|███▏ | 116273/371472 [9:15:13<21:47:29, 3.25it/s] 31%|███▏ | 116274/371472 [9:15:13<21:27:13, 3.30it/s] 31%|███▏ | 116275/371472 [9:15:14<21:14:08, 3.34it/s] 31%|███▏ | 116276/371472 [9:15:14<22:39:33, 3.13it/s] 31%|███▏ | 116277/371472 [9:15:14<21:01:36, 3.37it/s] 31%|███▏ | 116278/371472 [9:15:14<19:53:15, 3.56it/s] 31%|███▏ | 116279/371472 [9:15:15<20:11:40, 3.51it/s] 31%|███▏ | 116280/371472 [9:15:15<20:50:38, 3.40it/s] {'loss': 3.0551, 'learning_rate': 7.186106143204982e-07, 'epoch': 5.01} - 31%|███▏ | 116280/371472 [9:15:15<20:50:38, 3.40it/s] 31%|███▏ | 116281/371472 [9:15:15<21:04:43, 3.36it/s] 31%|███▏ | 116282/371472 [9:15:16<20:24:16, 3.47it/s] 31%|███▏ | 116283/371472 [9:15:16<20:48:44, 3.41it/s] 31%|███▏ | 116284/371472 [9:15:16<20:07:56, 3.52it/s] 31%|███▏ | 116285/371472 [9:15:17<21:02:57, 3.37it/s] 31%|███▏ | 116286/371472 [9:15:17<20:37:59, 3.44it/s] 31%|███▏ | 116287/371472 [9:15:17<22:36:51, 3.13it/s] 31%|███▏ | 116288/371472 [9:15:17<22:31:30, 3.15it/s] 31%|███▏ | 116289/371472 [9:15:18<22:08:13, 3.20it/s] 31%|███▏ | 116290/371472 [9:15:18<21:12:58, 3.34it/s] 31%|███▏ | 116291/371472 [9:15:18<21:30:25, 3.30it/s] 31%|███▏ | 116292/371472 [9:15:19<21:13:08, 3.34it/s] \ No newline at end of file + 31%|███▏ | 116280/371472 [9:15:15<20:50:38, 3.40it/s] 31%|███▏ | 116281/371472 [9:15:15<21:04:43, 3.36it/s] 31%|███▏ | 116282/371472 [9:15:16<20:24:16, 3.47it/s] 31%|███▏ | 116283/371472 [9:15:16<20:48:44, 3.41it/s] 31%|███▏ | 116284/371472 [9:15:16<20:07:56, 3.52it/s] 31%|███▏ | 116285/371472 [9:15:17<21:02:57, 3.37it/s] 31%|███▏ | 116286/371472 [9:15:17<20:37:59, 3.44it/s] 31%|███▏ | 116287/371472 [9:15:17<22:36:51, 3.13it/s] 31%|███▏ | 116288/371472 [9:15:17<22:31:30, 3.15it/s] 31%|███▏ | 116289/371472 [9:15:18<22:08:13, 3.20it/s] 31%|███▏ | 116290/371472 [9:15:18<21:12:58, 3.34it/s] 31%|███▏ | 116291/371472 [9:15:18<21:30:25, 3.30it/s] 31%|███▏ | 116292/371472 [9:15:19<21:13:08, 3.34it/s] 31%|███▏ | 116293/371472 [9:15:19<24:47:30, 2.86it/s] 31%|███▏ | 116294/371472 [9:15:19<24:11:50, 2.93it/s] 31%|███▏ | 116295/371472 [9:15:20<22:20:33, 3.17it/s] 31%|███▏ | 116296/371472 [9:15:20<21:55:38, 3.23it/s] 31%|███▏ | 116297/371472 [9:15:20<20:47:35, 3.41it/s] 31%|███▏ | 116298/371472 [9:15:21<20:58:14, 3.38it/s] 31%|███▏ | 116299/371472 [9:15:21<21:56:57, 3.23it/s] 31%|███▏ | 116300/371472 [9:15:21<22:43:02, 3.12it/s] {'loss': 3.3047, 'learning_rate': 7.185621323450192e-07, 'epoch': 5.01} + 31%|███▏ | 116300/371472 [9:15:21<22:43:02, 3.12it/s] 31%|███▏ | 116301/371472 [9:15:22<21:44:08, 3.26it/s] 31%|███▏ | 116302/371472 [9:15:22<20:17:15, 3.49it/s] 31%|███▏ | 116303/371472 [9:15:22<19:13:39, 3.69it/s] 31%|███▏ | 116304/371472 [9:15:22<18:53:12, 3.75it/s] 31%|███▏ | 116305/371472 [9:15:23<19:07:42, 3.71it/s] 31%|███▏ | 116306/371472 [9:15:23<19:15:14, 3.68it/s] 31%|███▏ | 116307/371472 [9:15:23<19:50:26, 3.57it/s] 31%|███▏ | 116308/371472 [9:15:23<19:20:38, 3.66it/s] 31%|███▏ | 116309/371472 [9:15:24<19:28:25, 3.64it/s] 31%|███▏ | 116310/371472 [9:15:24<19:17:38, 3.67it/s] 31%|███▏ | 116311/371472 [9:15:24<18:59:37, 3.73it/s] 31%|███▏ | 116312/371472 [9:15:24<19:14:00, 3.69it/s] 31%|███▏ | 116313/371472 [9:15:25<19:27:32, 3.64it/s] 31%|███▏ | 116314/371472 [9:15:25<19:40:46, 3.60it/s] 31%|███▏ | 116315/371472 [9:15:25<18:51:48, 3.76it/s] 31%|███▏ | 116316/371472 [9:15:26<18:54:38, 3.75it/s] 31%|███▏ | 116317/371472 [9:15:26<18:23:37, 3.85it/s] 31%|███▏ | 116318/371472 [9:15:26<17:59:52, 3.94it/s] 31%|███▏ | 116319/371472 [9:15:26<18:03:01, 3.93it/s] 31%|███▏ | 116320/371472 [9:15:27<19:51:06, 3.57it/s] {'loss': 3.2376, 'learning_rate': 7.185136503695403e-07, 'epoch': 5.01} + 31%|███▏ | 116320/371472 [9:15:27<19:51:06, 3.57it/s] 31%|███▏ | 116321/371472 [9:15:27<20:14:30, 3.50it/s] 31%|███▏ | 116322/371472 [9:15:27<20:05:08, 3.53it/s] 31%|███▏ | 116323/371472 [9:15:27<19:43:27, 3.59it/s] 31%|███▏ | 116324/371472 [9:15:28<19:20:08, 3.67it/s] 31%|███▏ | 116325/371472 [9:15:28<19:03:42, 3.72it/s] 31%|███▏ | 116326/371472 [9:15:28<19:19:33, 3.67it/s] 31%|███▏ | 116327/371472 [9:15:29<19:49:30, 3.57it/s] 31%|███▏ | 116328/371472 [9:15:29<21:01:36, 3.37it/s] 31%|███▏ | 116329/371472 [9:15:29<20:22:48, 3.48it/s] 31%|███▏ | 116330/371472 [9:15:29<20:19:58, 3.49it/s] 31%|███▏ | 116331/371472 [9:15:30<19:55:33, 3.56it/s] 31%|███▏ | 116332/371472 [9:15:30<20:11:35, 3.51it/s] 31%|███▏ | 116333/371472 [9:15:30<20:03:13, 3.53it/s] 31%|███▏ | 116334/371472 [9:15:31<20:16:17, 3.50it/s] 31%|███▏ | 116335/371472 [9:15:31<19:53:17, 3.56it/s] 31%|███▏ | 116336/371472 [9:15:31<20:21:19, 3.48it/s] 31%|███▏ | 116337/371472 [9:15:31<19:54:53, 3.56it/s] 31%|███▏ | 116338/371472 [9:15:32<20:16:55, 3.49it/s] 31%|███▏ | 116339/371472 [9:15:32<20:20:39, 3.48it/s] 31%|███▏ | 116340/371472 [9:15:32<21:23:20, 3.31it/s] {'loss': 3.1463, 'learning_rate': 7.184651683940614e-07, 'epoch': 5.01} + 31%|███▏ | 116340/371472 [9:15:32<21:23:20, 3.31it/s] 31%|███▏ | 116341/371472 [9:15:33<22:10:05, 3.20it/s] 31%|███▏ | 116342/371472 [9:15:33<23:20:38, 3.04it/s] 31%|███▏ | 116343/371472 [9:15:33<23:10:50, 3.06it/s] 31%|███▏ | 116344/371472 [9:15:34<23:07:30, 3.06it/s] 31%|███▏ | 116345/371472 [9:15:34<23:17:01, 3.04it/s] 31%|███▏ | 116346/371472 [9:15:34<23:06:29, 3.07it/s] 31%|███▏ | 116347/371472 [9:15:35<22:57:55, 3.09it/s] 31%|███▏ | 116348/371472 [9:15:35<23:03:31, 3.07it/s] 31%|███▏ | 116349/371472 [9:15:35<23:52:32, 2.97it/s] 31%|███▏ | 116350/371472 [9:15:36<25:15:33, 2.81it/s] 31%|███▏ | 116351/371472 [9:15:36<24:20:57, 2.91it/s] 31%|███▏ | 116352/371472 [9:15:36<24:02:46, 2.95it/s] 31%|███▏ | 116353/371472 [9:15:37<23:50:20, 2.97it/s] 31%|███▏ | 116354/371472 [9:15:37<22:04:31, 3.21it/s] 31%|███▏ | 116355/371472 [9:15:37<22:17:47, 3.18it/s] 31%|███▏ | 116356/371472 [9:15:38<23:49:37, 2.97it/s] 31%|███▏ | 116357/371472 [9:15:38<22:48:32, 3.11it/s] 31%|███▏ | 116358/371472 [9:15:38<22:49:05, 3.11it/s] 31%|███▏ | 116359/371472 [9:15:39<21:52:24, 3.24it/s] 31%|███▏ | 116360/371472 [9:15:39<21:51:45, 3.24it/s] {'loss': 3.3451, 'learning_rate': 7.184166864185826e-07, 'epoch': 5.01} + 31%|███▏ | 116360/371472 [9:15:39<21:51:45, 3.24it/s] 31%|███▏ | 116361/371472 [9:15:39<20:57:25, 3.38it/s] 31%|███▏ | 116362/371472 [9:15:39<22:20:17, 3.17it/s] 31%|███▏ | 116363/371472 [9:15:40<21:42:28, 3.26it/s] 31%|███▏ | 116364/371472 [9:15:40<20:56:31, 3.38it/s] 31%|███▏ | 116365/371472 [9:15:40<20:00:25, 3.54it/s] 31%|███▏ | 116366/371472 [9:15:41<19:33:55, 3.62it/s] 31%|███▏ | 116367/371472 [9:15:41<19:30:22, 3.63it/s] 31%|███▏ | 116368/371472 [9:15:41<20:36:17, 3.44it/s] 31%|███▏ | 116369/371472 [9:15:41<19:55:56, 3.56it/s] 31%|███▏ | 116370/371472 [9:15:42<20:27:46, 3.46it/s] 31%|███▏ | 116371/371472 [9:15:42<19:59:58, 3.54it/s] 31%|███▏ | 116372/371472 [9:15:42<20:23:08, 3.48it/s] 31%|███▏ | 116373/371472 [9:15:43<20:24:32, 3.47it/s] 31%|███▏ | 116374/371472 [9:15:43<21:30:32, 3.29it/s] 31%|███▏ | 116375/371472 [9:15:43<20:26:19, 3.47it/s] 31%|███▏ | 116376/371472 [9:15:43<20:30:07, 3.46it/s] 31%|███▏ | 116377/371472 [9:15:44<20:25:09, 3.47it/s] 31%|███▏ | 116378/371472 [9:15:44<19:58:55, 3.55it/s] 31%|███▏ | 116379/371472 [9:15:44<20:19:11, 3.49it/s] 31%|███▏ | 116380/371472 [9:15:45<24:19:24, 2.91it/s] {'loss': 3.3481, 'learning_rate': 7.183682044431037e-07, 'epoch': 5.01} + 31%|███▏ | 116380/371472 [9:15:45<24:19:24, 2.91it/s] 31%|███▏ | 116381/371472 [9:15:45<22:55:28, 3.09it/s] 31%|███▏ | 116382/371472 [9:15:45<21:53:43, 3.24it/s] 31%|███▏ | 116383/371472 [9:15:46<20:49:46, 3.40it/s] 31%|███▏ | 116384/371472 [9:15:46<20:20:35, 3.48it/s] 31%|███▏ | 116385/371472 [9:15:46<20:08:51, 3.52it/s] 31%|███▏ | 116386/371472 [9:15:47<22:34:40, 3.14it/s] 31%|███▏ | 116387/371472 [9:15:47<21:47:23, 3.25it/s] 31%|███▏ | 116388/371472 [9:15:47<20:30:53, 3.45it/s] 31%|███▏ | 116389/371472 [9:15:47<20:03:43, 3.53it/s] 31%|███▏ | 116390/371472 [9:15:48<23:45:43, 2.98it/s] 31%|███▏ | 116391/371472 [9:15:48<22:24:44, 3.16it/s] 31%|███▏ | 116392/371472 [9:15:48<21:15:08, 3.33it/s] 31%|███▏ | 116393/371472 [9:15:49<20:21:31, 3.48it/s] 31%|███▏ | 116394/371472 [9:15:49<20:19:26, 3.49it/s] 31%|███▏ | 116395/371472 [9:15:49<19:17:41, 3.67it/s] 31%|███▏ | 116396/371472 [9:15:49<18:47:19, 3.77it/s] 31%|███▏ | 116397/371472 [9:15:50<19:41:14, 3.60it/s] 31%|███▏ | 116398/371472 [9:15:50<19:46:05, 3.58it/s] 31%|███▏ | 116399/371472 [9:15:50<20:44:17, 3.42it/s] 31%|███▏ | 116400/371472 [9:15:51<20:29:25, 3.46it/s] {'loss': 3.3748, 'learning_rate': 7.183197224676248e-07, 'epoch': 5.01} + 31%|███▏ | 116400/371472 [9:15:51<20:29:25, 3.46it/s] 31%|███▏ | 116401/371472 [9:15:51<21:09:14, 3.35it/s] 31%|███▏ | 116402/371472 [9:15:51<21:48:55, 3.25it/s] 31%|███▏ | 116403/371472 [9:15:51<21:21:21, 3.32it/s] 31%|███▏ | 116404/371472 [9:15:52<21:09:13, 3.35it/s] 31%|███▏ | 116405/371472 [9:15:52<20:23:55, 3.47it/s] 31%|███▏ | 116406/371472 [9:15:52<20:59:18, 3.38it/s] 31%|███▏ | 116407/371472 [9:15:53<20:20:34, 3.48it/s] 31%|███▏ | 116408/371472 [9:15:53<20:43:14, 3.42it/s] 31%|███▏ | 116409/371472 [9:15:53<22:33:06, 3.14it/s] 31%|███▏ | 116410/371472 [9:15:54<21:39:40, 3.27it/s] 31%|███▏ | 116411/371472 [9:15:54<22:23:08, 3.16it/s] 31%|███▏ | 116412/371472 [9:15:54<21:43:39, 3.26it/s] 31%|███▏ | 116413/371472 [9:15:54<21:08:00, 3.35it/s] 31%|███▏ | 116414/371472 [9:15:55<20:15:57, 3.50it/s] 31%|███▏ | 116415/371472 [9:15:55<20:41:19, 3.42it/s] 31%|███▏ | 116416/371472 [9:15:55<21:03:52, 3.36it/s] 31%|███▏ | 116417/371472 [9:15:56<21:35:52, 3.28it/s] 31%|███▏ | 116418/371472 [9:15:56<20:27:19, 3.46it/s] 31%|███▏ | 116419/371472 [9:15:56<20:22:15, 3.48it/s] 31%|███▏ | 116420/371472 [9:15:56<19:56:16, 3.55it/s] {'loss': 3.399, 'learning_rate': 7.182712404921459e-07, 'epoch': 5.01} + 31%|███▏ | 116420/371472 [9:15:56<19:56:16, 3.55it/s] 31%|███▏ | 116421/371472 [9:15:57<19:18:44, 3.67it/s] 31%|███▏ | 116422/371472 [9:15:57<20:07:44, 3.52it/s] 31%|███▏ | 116423/371472 [9:15:57<19:47:22, 3.58it/s] 31%|███▏ | 116424/371472 [9:15:58<19:41:29, 3.60it/s] 31%|███▏ | 116425/371472 [9:15:58<19:51:23, 3.57it/s] 31%|███▏ | 116426/371472 [9:15:58<20:06:58, 3.52it/s] 31%|███▏ | 116427/371472 [9:15:59<21:22:30, 3.31it/s] 31%|███▏ | 116428/371472 [9:15:59<20:24:46, 3.47it/s] 31%|███▏ | 116429/371472 [9:15:59<19:56:46, 3.55it/s] 31%|███▏ | 116430/371472 [9:15:59<19:40:38, 3.60it/s] 31%|███▏ | 116431/371472 [9:16:00<19:00:00, 3.73it/s] 31%|███▏ | 116432/371472 [9:16:00<19:34:01, 3.62it/s] 31%|███▏ | 116433/371472 [9:16:00<20:22:56, 3.48it/s] 31%|███▏ | 116434/371472 [9:16:00<19:32:26, 3.63it/s] 31%|███▏ | 116435/371472 [9:16:01<20:17:20, 3.49it/s] 31%|███▏ | 116436/371472 [9:16:01<19:59:56, 3.54it/s] 31%|███▏ | 116437/371472 [9:16:01<19:51:47, 3.57it/s] 31%|███▏ | 116438/371472 [9:16:02<20:28:05, 3.46it/s] 31%|███▏ | 116439/371472 [9:16:02<19:54:52, 3.56it/s] 31%|███▏ | 116440/371472 [9:16:02<22:00:20, 3.22it/s] {'loss': 3.2716, 'learning_rate': 7.182227585166669e-07, 'epoch': 5.02} + 31%|███▏ | 116440/371472 [9:16:02<22:00:20, 3.22it/s] 31%|███▏ | 116441/371472 [9:16:02<21:01:14, 3.37it/s] 31%|███▏ | 116442/371472 [9:16:03<20:11:22, 3.51it/s] 31%|███▏ | 116443/371472 [9:16:03<21:03:43, 3.36it/s] 31%|███▏ | 116444/371472 [9:16:03<20:48:11, 3.41it/s] 31%|███▏ | 116445/371472 [9:16:04<20:25:14, 3.47it/s] 31%|███▏ | 116446/371472 [9:16:04<20:18:56, 3.49it/s] 31%|███▏ | 116447/371472 [9:16:04<21:14:04, 3.34it/s] 31%|███▏ | 116448/371472 [9:16:05<21:20:40, 3.32it/s] 31%|███▏ | 116449/371472 [9:16:05<20:51:38, 3.40it/s] 31%|███▏ | 116450/371472 [9:16:05<20:12:04, 3.51it/s] 31%|███▏ | 116451/371472 [9:16:06<23:04:35, 3.07it/s] 31%|███▏ | 116452/371472 [9:16:06<22:33:11, 3.14it/s] 31%|███▏ | 116453/371472 [9:16:06<21:12:49, 3.34it/s] 31%|███▏ | 116454/371472 [9:16:06<20:59:35, 3.37it/s] 31%|███▏ | 116455/371472 [9:16:07<20:16:07, 3.49it/s] 31%|███▏ | 116456/371472 [9:16:07<21:05:30, 3.36it/s] 31%|███▏ | 116457/371472 [9:16:07<22:52:48, 3.10it/s] 31%|███▏ | 116458/371472 [9:16:08<22:02:42, 3.21it/s] 31%|███▏ | 116459/371472 [9:16:08<21:02:43, 3.37it/s] 31%|███▏ | 116460/371472 [9:16:08<20:52:40, 3.39it/s] {'loss': 3.3551, 'learning_rate': 7.181742765411881e-07, 'epoch': 5.02} + 31%|███▏ | 116460/371472 [9:16:08<20:52:40, 3.39it/s] 31%|███▏ | 116461/371472 [9:16:08<19:40:43, 3.60it/s] 31%|███▏ | 116462/371472 [9:16:09<19:52:14, 3.56it/s] 31%|███▏ | 116463/371472 [9:16:09<20:17:09, 3.49it/s] 31%|███▏ | 116464/371472 [9:16:09<19:59:35, 3.54it/s] 31%|███▏ | 116465/371472 [9:16:10<20:28:45, 3.46it/s] 31%|███▏ | 116466/371472 [9:16:10<20:35:08, 3.44it/s] 31%|███▏ | 116467/371472 [9:16:10<20:34:29, 3.44it/s] 31%|███▏ | 116468/371472 [9:16:10<20:53:39, 3.39it/s] 31%|███▏ | 116469/371472 [9:16:11<20:11:39, 3.51it/s] 31%|███▏ | 116470/371472 [9:16:11<19:15:35, 3.68it/s] 31%|███▏ | 116471/371472 [9:16:11<18:53:12, 3.75it/s] 31%|███▏ | 116472/371472 [9:16:12<20:04:24, 3.53it/s] 31%|███▏ | 116473/371472 [9:16:12<20:02:37, 3.53it/s] 31%|███▏ | 116474/371472 [9:16:12<19:59:47, 3.54it/s] 31%|███▏ | 116475/371472 [9:16:12<19:36:19, 3.61it/s] 31%|███▏ | 116476/371472 [9:16:13<20:36:29, 3.44it/s] 31%|███▏ | 116477/371472 [9:16:13<20:19:55, 3.48it/s] 31%|███▏ | 116478/371472 [9:16:13<20:18:46, 3.49it/s] 31%|███▏ | 116479/371472 [9:16:14<19:40:40, 3.60it/s] 31%|███▏ | 116480/371472 [9:16:14<20:03:38, 3.53it/s] {'loss': 3.2017, 'learning_rate': 7.181257945657092e-07, 'epoch': 5.02} + 31%|███▏ | 116480/371472 [9:16:14<20:03:38, 3.53it/s] 31%|███▏ | 116481/371472 [9:16:14<19:44:57, 3.59it/s] 31%|███▏ | 116482/371472 [9:16:14<20:00:28, 3.54it/s] 31%|███▏ | 116483/371472 [9:16:15<20:21:51, 3.48it/s] 31%|███▏ | 116484/371472 [9:16:15<19:44:55, 3.59it/s] 31%|███▏ | 116485/371472 [9:16:15<21:11:48, 3.34it/s] 31%|███▏ | 116486/371472 [9:16:16<21:14:06, 3.34it/s] 31%|███▏ | 116487/371472 [9:16:16<23:16:54, 3.04it/s] 31%|███▏ | 116488/371472 [9:16:16<23:40:19, 2.99it/s] 31%|███▏ | 116489/371472 [9:16:17<22:38:53, 3.13it/s] 31%|███▏ | 116490/371472 [9:16:17<21:24:26, 3.31it/s] 31%|███▏ | 116491/371472 [9:16:17<22:25:40, 3.16it/s] 31%|███▏ | 116492/371472 [9:16:18<23:30:49, 3.01it/s] 31%|███▏ | 116493/371472 [9:16:18<22:04:35, 3.21it/s] 31%|███▏ | 116494/371472 [9:16:18<21:26:33, 3.30it/s] 31%|███▏ | 116495/371472 [9:16:18<22:33:19, 3.14it/s] 31%|███▏ | 116496/371472 [9:16:19<22:00:53, 3.22it/s] 31%|███▏ | 116497/371472 [9:16:19<21:55:44, 3.23it/s] 31%|███▏ | 116498/371472 [9:16:19<20:54:27, 3.39it/s] 31%|███▏ | 116499/371472 [9:16:20<20:55:00, 3.39it/s] 31%|███▏ | 116500/371472 [9:16:20<20:30:12, 3.45it/s] {'loss': 3.2237, 'learning_rate': 7.180773125902303e-07, 'epoch': 5.02} + 31%|███▏ | 116500/371472 [9:16:20<20:30:12, 3.45it/s] 31%|███▏ | 116501/371472 [9:16:20<20:15:08, 3.50it/s] 31%|███▏ | 116502/371472 [9:16:21<20:37:09, 3.43it/s] 31%|███▏ | 116503/371472 [9:16:21<21:13:32, 3.34it/s] 31%|███▏ | 116504/371472 [9:16:21<20:45:49, 3.41it/s] 31%|███▏ | 116505/371472 [9:16:21<21:48:14, 3.25it/s] 31%|███▏ | 116506/371472 [9:16:22<21:43:10, 3.26it/s] 31%|███▏ | 116507/371472 [9:16:22<22:19:59, 3.17it/s] 31%|███▏ | 116508/371472 [9:16:22<24:14:08, 2.92it/s] 31%|███▏ | 116509/371472 [9:16:23<23:12:52, 3.05it/s] 31%|███▏ | 116510/371472 [9:16:23<21:43:51, 3.26it/s] 31%|███▏ | 116511/371472 [9:16:23<20:41:09, 3.42it/s] 31%|███▏ | 116512/371472 [9:16:24<21:33:22, 3.29it/s] 31%|███▏ | 116513/371472 [9:16:24<20:30:53, 3.45it/s] 31%|███▏ | 116514/371472 [9:16:24<20:27:20, 3.46it/s] 31%|███▏ | 116515/371472 [9:16:24<20:36:20, 3.44it/s] 31%|███▏ | 116516/371472 [9:16:25<23:24:56, 3.02it/s] 31%|███▏ | 116517/371472 [9:16:25<22:23:06, 3.16it/s] 31%|███▏ | 116518/371472 [9:16:25<21:31:39, 3.29it/s] 31%|███▏ | 116519/371472 [9:16:26<20:21:01, 3.48it/s] 31%|███▏ | 116520/371472 [9:16:26<20:01:55, 3.54it/s] {'loss': 3.333, 'learning_rate': 7.180288306147514e-07, 'epoch': 5.02} + 31%|███▏ | 116520/371472 [9:16:26<20:01:55, 3.54it/s] 31%|███▏ | 116521/371472 [9:16:26<19:31:15, 3.63it/s] 31%|███▏ | 116522/371472 [9:16:27<19:50:37, 3.57it/s] 31%|███▏ | 116523/371472 [9:16:27<19:27:55, 3.64it/s] 31%|███▏ | 116524/371472 [9:16:27<20:25:21, 3.47it/s] 31%|███▏ | 116525/371472 [9:16:27<20:08:49, 3.52it/s] 31%|███▏ | 116526/371472 [9:16:28<19:54:07, 3.56it/s] 31%|███▏ | 116527/371472 [9:16:28<19:32:19, 3.62it/s] 31%|███▏ | 116528/371472 [9:16:28<19:54:27, 3.56it/s] 31%|███▏ | 116529/371472 [9:16:29<21:37:01, 3.28it/s] 31%|███▏ | 116530/371472 [9:16:29<21:42:57, 3.26it/s] 31%|███▏ | 116531/371472 [9:16:29<21:03:57, 3.36it/s] 31%|███▏ | 116532/371472 [9:16:29<21:08:26, 3.35it/s] 31%|███▏ | 116533/371472 [9:16:30<20:04:34, 3.53it/s] 31%|███▏ | 116534/371472 [9:16:30<19:56:56, 3.55it/s] 31%|███▏ | 116535/371472 [9:16:30<20:21:10, 3.48it/s] 31%|███▏ | 116536/371472 [9:16:31<20:12:33, 3.50it/s] 31%|███▏ | 116537/371472 [9:16:31<20:21:56, 3.48it/s] 31%|███▏ | 116538/371472 [9:16:31<20:36:55, 3.44it/s] 31%|███▏ | 116539/371472 [9:16:32<22:09:48, 3.20it/s] 31%|███▏ | 116540/371472 [9:16:32<22:08:21, 3.20it/s] {'loss': 3.3354, 'learning_rate': 7.179803486392726e-07, 'epoch': 5.02} + 31%|███▏ | 116540/371472 [9:16:32<22:08:21, 3.20it/s] 31%|███▏ | 116541/371472 [9:16:32<22:39:07, 3.13it/s] 31%|███▏ | 116542/371472 [9:16:32<22:01:59, 3.21it/s] 31%|███▏ | 116543/371472 [9:16:33<22:20:04, 3.17it/s] 31%|███▏ | 116544/371472 [9:16:33<21:27:17, 3.30it/s] 31%|███▏ | 116545/371472 [9:16:33<20:36:40, 3.44it/s] 31%|███▏ | 116546/371472 [9:16:34<20:37:22, 3.43it/s] 31%|███▏ | 116547/371472 [9:16:34<20:10:41, 3.51it/s] 31%|███▏ | 116548/371472 [9:16:34<21:09:00, 3.35it/s] 31%|███▏ | 116549/371472 [9:16:35<22:02:43, 3.21it/s] 31%|███▏ | 116550/371472 [9:16:35<23:26:34, 3.02it/s] 31%|███▏ | 116551/371472 [9:16:35<22:19:58, 3.17it/s] 31%|███▏ | 116552/371472 [9:16:35<21:20:30, 3.32it/s] 31%|███▏ | 116553/371472 [9:16:36<21:02:05, 3.37it/s] 31%|███▏ | 116554/371472 [9:16:36<21:06:30, 3.35it/s] 31%|███▏ | 116555/371472 [9:16:36<21:24:50, 3.31it/s] 31%|███▏ | 116556/371472 [9:16:37<20:56:05, 3.38it/s] 31%|███▏ | 116557/371472 [9:16:37<21:16:48, 3.33it/s] 31%|███▏ | 116558/371472 [9:16:37<21:40:21, 3.27it/s] 31%|███▏ | 116559/371472 [9:16:38<21:21:53, 3.31it/s] 31%|███▏ | 116560/371472 [9:16:38<21:47:41, 3.25it/s] {'loss': 3.2447, 'learning_rate': 7.179318666637936e-07, 'epoch': 5.02} + 31%|███▏ | 116560/371472 [9:16:38<21:47:41, 3.25it/s] 31%|███▏ | 116561/371472 [9:16:38<21:25:39, 3.30it/s] 31%|███▏ | 116562/371472 [9:16:38<21:06:08, 3.36it/s] 31%|███▏ | 116563/371472 [9:16:39<20:07:38, 3.52it/s] 31%|███▏ | 116564/371472 [9:16:39<21:50:53, 3.24it/s] 31%|███▏ | 116565/371472 [9:16:39<21:16:13, 3.33it/s] 31%|███▏ | 116566/371472 [9:16:40<20:42:36, 3.42it/s] 31%|███▏ | 116567/371472 [9:16:40<20:43:25, 3.42it/s] 31%|███▏ | 116568/371472 [9:16:40<19:53:20, 3.56it/s] 31%|███▏ | 116569/371472 [9:16:40<19:35:28, 3.61it/s] 31%|███▏ | 116570/371472 [9:16:41<20:17:29, 3.49it/s] 31%|███▏ | 116571/371472 [9:16:41<20:58:22, 3.38it/s] 31%|███▏ | 116572/371472 [9:16:41<21:07:04, 3.35it/s] 31%|███▏ | 116573/371472 [9:16:42<21:35:18, 3.28it/s] 31%|███▏ | 116574/371472 [9:16:42<21:27:50, 3.30it/s] 31%|███▏ | 116575/371472 [9:16:42<21:06:40, 3.35it/s] 31%|███▏ | 116576/371472 [9:16:43<21:04:09, 3.36it/s] 31%|███▏ | 116577/371472 [9:16:43<20:22:16, 3.48it/s] 31%|███▏ | 116578/371472 [9:16:43<20:02:41, 3.53it/s] 31%|███▏ | 116579/371472 [9:16:43<19:27:17, 3.64it/s] 31%|███▏ | 116580/371472 [9:16:44<20:39:22, 3.43it/s] {'loss': 3.3293, 'learning_rate': 7.178833846883147e-07, 'epoch': 5.02} + 31%|███▏ | 116580/371472 [9:16:44<20:39:22, 3.43it/s] 31%|███▏ | 116581/371472 [9:16:44<20:54:37, 3.39it/s] 31%|███▏ | 116582/371472 [9:16:44<22:04:42, 3.21it/s] 31%|███▏ | 116583/371472 [9:16:45<21:42:49, 3.26it/s] 31%|███▏ | 116584/371472 [9:16:45<20:58:19, 3.38it/s] 31%|███▏ | 116585/371472 [9:16:45<20:50:32, 3.40it/s] 31%|███▏ | 116586/371472 [9:16:46<20:43:41, 3.42it/s] 31%|███▏ | 116587/371472 [9:16:46<20:31:23, 3.45it/s] 31%|███▏ | 116588/371472 [9:16:46<21:13:14, 3.34it/s] 31%|███▏ | 116589/371472 [9:16:46<20:41:11, 3.42it/s] 31%|███▏ | 116590/371472 [9:16:47<20:16:17, 3.49it/s] 31%|███▏ | 116591/371472 [9:16:47<21:02:56, 3.36it/s] 31%|███▏ | 116592/371472 [9:16:47<21:57:54, 3.22it/s] 31%|███▏ | 116593/371472 [9:16:48<22:04:15, 3.21it/s] 31%|███▏ | 116594/371472 [9:16:48<22:03:34, 3.21it/s] 31%|███▏ | 116595/371472 [9:16:48<23:14:31, 3.05it/s] 31%|███▏ | 116596/371472 [9:16:49<24:42:33, 2.87it/s] 31%|███▏ | 116597/371472 [9:16:49<23:18:12, 3.04it/s] 31%|███▏ | 116598/371472 [9:16:49<22:05:11, 3.21it/s] 31%|███▏ | 116599/371472 [9:16:50<21:21:53, 3.31it/s] 31%|███▏ | 116600/371472 [9:16:50<20:41:33, 3.42it/s] {'loss': 3.2851, 'learning_rate': 7.178349027128358e-07, 'epoch': 5.02} + 31%|███▏ | 116600/371472 [9:16:50<20:41:33, 3.42it/s] 31%|███▏ | 116601/371472 [9:16:50<20:18:40, 3.49it/s] 31%|███▏ | 116602/371472 [9:16:50<21:45:36, 3.25it/s] 31%|███▏ | 116603/371472 [9:16:51<20:56:13, 3.38it/s] 31%|███▏ | 116604/371472 [9:16:51<21:16:43, 3.33it/s] 31%|███▏ | 116605/371472 [9:16:51<21:28:23, 3.30it/s] 31%|███▏ | 116606/371472 [9:16:52<22:34:28, 3.14it/s] 31%|███▏ | 116607/371472 [9:16:52<21:50:32, 3.24it/s] 31%|███▏ | 116608/371472 [9:16:52<21:23:33, 3.31it/s] 31%|███▏ | 116609/371472 [9:16:53<20:33:27, 3.44it/s] 31%|███▏ | 116610/371472 [9:16:53<20:52:33, 3.39it/s] 31%|███▏ | 116611/371472 [9:16:53<20:41:39, 3.42it/s] 31%|███▏ | 116612/371472 [9:16:53<21:20:04, 3.32it/s] 31%|███▏ | 116613/371472 [9:16:54<20:28:08, 3.46it/s] 31%|███▏ | 116614/371472 [9:16:54<20:44:52, 3.41it/s] 31%|███▏ | 116615/371472 [9:16:54<20:23:18, 3.47it/s] 31%|███▏ | 116616/371472 [9:16:55<20:09:50, 3.51it/s] 31%|███▏ | 116617/371472 [9:16:55<19:32:16, 3.62it/s] 31%|███▏ | 116618/371472 [9:16:55<19:12:01, 3.69it/s] 31%|███▏ | 116619/371472 [9:16:55<19:29:06, 3.63it/s] 31%|███▏ | 116620/371472 [9:16:56<19:31:20, 3.63it/s] {'loss': 3.2433, 'learning_rate': 7.177864207373569e-07, 'epoch': 5.02} + 31%|███▏ | 116620/371472 [9:16:56<19:31:20, 3.63it/s] 31%|███▏ | 116621/371472 [9:16:56<19:44:26, 3.59it/s] 31%|███▏ | 116622/371472 [9:16:56<19:44:31, 3.59it/s] 31%|███▏ | 116623/371472 [9:16:56<19:11:38, 3.69it/s] 31%|███▏ | 116624/371472 [9:16:57<19:23:01, 3.65it/s] 31%|███▏ | 116625/371472 [9:16:57<19:04:24, 3.71it/s] 31%|███▏ | 116626/371472 [9:16:57<19:36:28, 3.61it/s] 31%|███▏ | 116627/371472 [9:16:58<20:33:58, 3.44it/s] 31%|███▏ | 116628/371472 [9:16:58<20:23:56, 3.47it/s] 31%|███▏ | 116629/371472 [9:16:58<19:58:42, 3.54it/s] 31%|███▏ | 116630/371472 [9:16:58<20:00:35, 3.54it/s] 31%|███▏ | 116631/371472 [9:16:59<22:02:08, 3.21it/s] 31%|███▏ | 116632/371472 [9:16:59<21:26:24, 3.30it/s] 31%|███▏ | 116633/371472 [9:16:59<21:03:01, 3.36it/s] 31%|███▏ | 116634/371472 [9:17:00<20:35:03, 3.44it/s] 31%|███▏ | 116635/371472 [9:17:00<19:51:05, 3.57it/s] 31%|███▏ | 116636/371472 [9:17:00<20:20:38, 3.48it/s] 31%|███▏ | 116637/371472 [9:17:01<21:56:13, 3.23it/s] 31%|███▏ | 116638/371472 [9:17:01<22:39:40, 3.12it/s] 31%|███▏ | 116639/371472 [9:17:01<22:05:03, 3.21it/s] 31%|███▏ | 116640/371472 [9:17:02<22:38:10, 3.13it/s] {'loss': 3.4275, 'learning_rate': 7.177379387618781e-07, 'epoch': 5.02} + 31%|███▏ | 116640/371472 [9:17:02<22:38:10, 3.13it/s] 31%|███▏ | 116641/371472 [9:17:02<21:32:20, 3.29it/s] 31%|███▏ | 116642/371472 [9:17:02<20:53:09, 3.39it/s] 31%|███▏ | 116643/371472 [9:17:02<22:04:42, 3.21it/s] 31%|███▏ | 116644/371472 [9:17:03<22:29:31, 3.15it/s] 31%|███▏ | 116645/371472 [9:17:03<21:30:38, 3.29it/s] 31%|███▏ | 116646/371472 [9:17:03<20:34:25, 3.44it/s] 31%|███▏ | 116647/371472 [9:17:04<20:08:56, 3.51it/s] 31%|███▏ | 116648/371472 [9:17:04<20:00:44, 3.54it/s] 31%|███▏ | 116649/371472 [9:17:04<20:02:53, 3.53it/s] 31%|███▏ | 116650/371472 [9:17:04<20:28:27, 3.46it/s] 31%|███▏ | 116651/371472 [9:17:05<20:36:40, 3.43it/s] 31%|███▏ | 116652/371472 [9:17:05<20:06:35, 3.52it/s] 31%|███▏ | 116653/371472 [9:17:05<19:55:41, 3.55it/s] 31%|███▏ | 116654/371472 [9:17:06<19:40:33, 3.60it/s] 31%|███▏ | 116655/371472 [9:17:06<19:01:24, 3.72it/s] 31%|███▏ | 116656/371472 [9:17:06<20:02:25, 3.53it/s] 31%|███▏ | 116657/371472 [9:17:06<19:44:23, 3.59it/s] 31%|███▏ | 116658/371472 [9:17:07<20:05:36, 3.52it/s] 31%|███▏ | 116659/371472 [9:17:07<20:07:14, 3.52it/s] 31%|███▏ | 116660/371472 [9:17:07<19:35:41, 3.61it/s] {'loss': 3.2474, 'learning_rate': 7.176894567863992e-07, 'epoch': 5.02} + 31%|███▏ | 116660/371472 [9:17:07<19:35:41, 3.61it/s] 31%|███▏ | 116661/371472 [9:17:08<19:17:47, 3.67it/s] 31%|███▏ | 116662/371472 [9:17:08<19:02:57, 3.72it/s] 31%|███▏ | 116663/371472 [9:17:08<18:49:39, 3.76it/s] 31%|███▏ | 116664/371472 [9:17:08<20:04:19, 3.53it/s] 31%|███▏ | 116665/371472 [9:17:09<20:45:37, 3.41it/s] 31%|███▏ | 116666/371472 [9:17:09<21:40:04, 3.27it/s] 31%|███▏ | 116667/371472 [9:17:09<21:35:21, 3.28it/s] 31%|███▏ | 116668/371472 [9:17:10<20:44:37, 3.41it/s] 31%|███▏ | 116669/371472 [9:17:10<20:39:48, 3.43it/s] 31%|███▏ | 116670/371472 [9:17:10<21:23:31, 3.31it/s] 31%|███▏ | 116671/371472 [9:17:10<21:21:12, 3.31it/s] 31%|███▏ | 116672/371472 [9:17:11<21:54:11, 3.23it/s] 31%|███▏ | 116673/371472 [9:17:11<21:21:28, 3.31it/s] 31%|███▏ | 116674/371472 [9:17:11<21:34:51, 3.28it/s] 31%|███▏ | 116675/371472 [9:17:12<21:31:10, 3.29it/s] 31%|███▏ | 116676/371472 [9:17:12<23:32:16, 3.01it/s] 31%|███▏ | 116677/371472 [9:17:13<24:49:52, 2.85it/s] 31%|███▏ | 116678/371472 [9:17:13<22:47:16, 3.11it/s] 31%|███▏ | 116679/371472 [9:17:13<21:23:14, 3.31it/s] 31%|███▏ | 116680/371472 [9:17:13<20:37:02, 3.43it/s] {'loss': 3.1054, 'learning_rate': 7.176409748109202e-07, 'epoch': 5.03} + 31%|███▏ | 116680/371472 [9:17:13<20:37:02, 3.43it/s] 31%|███▏ | 116681/371472 [9:17:14<21:50:14, 3.24it/s] 31%|███▏ | 116682/371472 [9:17:14<21:29:02, 3.29it/s] 31%|███▏ | 116683/371472 [9:17:14<20:44:27, 3.41it/s] 31%|███▏ | 116684/371472 [9:17:14<20:53:05, 3.39it/s] 31%|███▏ | 116685/371472 [9:17:15<20:48:13, 3.40it/s] 31%|███▏ | 116686/371472 [9:17:15<20:04:44, 3.52it/s] 31%|███▏ | 116687/371472 [9:17:15<19:38:21, 3.60it/s] 31%|███▏ | 116688/371472 [9:17:16<19:16:02, 3.67it/s] 31%|███▏ | 116689/371472 [9:17:16<19:26:08, 3.64it/s] 31%|███▏ | 116690/371472 [9:17:16<19:21:24, 3.66it/s] 31%|███▏ | 116691/371472 [9:17:16<20:18:27, 3.49it/s] 31%|███▏ | 116692/371472 [9:17:17<21:04:42, 3.36it/s] 31%|███▏ | 116693/371472 [9:17:17<20:35:23, 3.44it/s] 31%|███▏ | 116694/371472 [9:17:17<20:06:23, 3.52it/s] 31%|███▏ | 116695/371472 [9:17:18<21:13:01, 3.34it/s] 31%|███▏ | 116696/371472 [9:17:18<21:20:07, 3.32it/s] 31%|███▏ | 116697/371472 [9:17:18<20:33:38, 3.44it/s] 31%|███▏ | 116698/371472 [9:17:19<22:00:49, 3.21it/s] 31%|███▏ | 116699/371472 [9:17:19<20:53:34, 3.39it/s] 31%|███▏ | 116700/371472 [9:17:19<20:28:10, 3.46it/s] {'loss': 3.2145, 'learning_rate': 7.175924928354414e-07, 'epoch': 5.03} + 31%|███▏ | 116700/371472 [9:17:19<20:28:10, 3.46it/s] 31%|███▏ | 116701/371472 [9:17:19<20:20:07, 3.48it/s] 31%|███▏ | 116702/371472 [9:17:20<19:42:01, 3.59it/s] 31%|███▏ | 116703/371472 [9:17:20<21:13:03, 3.34it/s] 31%|███▏ | 116704/371472 [9:17:20<21:18:59, 3.32it/s] 31%|███▏ | 116705/371472 [9:17:21<20:03:04, 3.53it/s] 31%|███▏ | 116706/371472 [9:17:21<20:22:33, 3.47it/s] 31%|███▏ | 116707/371472 [9:17:21<19:25:52, 3.64it/s] 31%|███▏ | 116708/371472 [9:17:21<19:43:24, 3.59it/s] 31%|███▏ | 116709/371472 [9:17:22<19:34:39, 3.61it/s] 31%|███▏ | 116710/371472 [9:17:22<19:09:39, 3.69it/s] 31%|███▏ | 116711/371472 [9:17:22<19:32:32, 3.62it/s] 31%|███▏ | 116712/371472 [9:17:22<20:10:23, 3.51it/s] 31%|███▏ | 116713/371472 [9:17:23<20:31:52, 3.45it/s] 31%|███▏ | 116714/371472 [9:17:23<19:48:10, 3.57it/s] 31%|███▏ | 116715/371472 [9:17:23<20:01:21, 3.53it/s] 31%|███▏ | 116716/371472 [9:17:24<19:27:03, 3.64it/s] 31%|███▏ | 116717/371472 [9:17:24<19:13:48, 3.68it/s] 31%|███▏ | 116718/371472 [9:17:24<18:44:12, 3.78it/s] 31%|███▏ | 116719/371472 [9:17:24<19:49:34, 3.57it/s] 31%|███▏ | 116720/371472 [9:17:25<20:27:35, 3.46it/s] {'loss': 3.3423, 'learning_rate': 7.175440108599625e-07, 'epoch': 5.03} + 31%|███▏ | 116720/371472 [9:17:25<20:27:35, 3.46it/s] 31%|███▏ | 116721/371472 [9:17:25<20:50:25, 3.40it/s] 31%|███▏ | 116722/371472 [9:17:25<21:08:06, 3.35it/s] 31%|███▏ | 116723/371472 [9:17:26<20:32:22, 3.45it/s] 31%|███▏ | 116724/371472 [9:17:26<21:46:02, 3.25it/s] 31%|███▏ | 116725/371472 [9:17:26<22:39:48, 3.12it/s] 31%|███▏ | 116726/371472 [9:17:27<23:17:30, 3.04it/s] 31%|███▏ | 116727/371472 [9:17:27<22:22:21, 3.16it/s] 31%|███▏ | 116728/371472 [9:17:27<22:08:02, 3.20it/s] 31%|███▏ | 116729/371472 [9:17:28<20:56:32, 3.38it/s] 31%|███▏ | 116730/371472 [9:17:28<20:19:49, 3.48it/s] 31%|███▏ | 116731/371472 [9:17:28<19:40:47, 3.60it/s] 31%|███▏ | 116732/371472 [9:17:28<19:16:36, 3.67it/s] 31%|███▏ | 116733/371472 [9:17:29<19:44:03, 3.59it/s] 31%|███▏ | 116734/371472 [9:17:29<19:17:45, 3.67it/s] 31%|███▏ | 116735/371472 [9:17:29<21:21:09, 3.31it/s] 31%|███▏ | 116736/371472 [9:17:29<20:41:16, 3.42it/s] 31%|███▏ | 116737/371472 [9:17:30<19:58:20, 3.54it/s] 31%|███▏ | 116738/371472 [9:17:30<20:04:17, 3.53it/s] 31%|███▏ | 116739/371472 [9:17:30<19:39:15, 3.60it/s] 31%|███▏ | 116740/371472 [9:17:31<19:25:54, 3.64it/s] {'loss': 3.2361, 'learning_rate': 7.174955288844835e-07, 'epoch': 5.03} + 31%|███▏ | 116740/371472 [9:17:31<19:25:54, 3.64it/s] 31%|███▏ | 116741/371472 [9:17:31<19:21:49, 3.65it/s] 31%|███▏ | 116742/371472 [9:17:31<18:55:05, 3.74it/s] 31%|███▏ | 116743/371472 [9:17:31<18:41:24, 3.79it/s] 31%|███▏ | 116744/371472 [9:17:32<18:22:08, 3.85it/s] 31%|███▏ | 116745/371472 [9:17:32<18:07:46, 3.90it/s] 31%|███▏ | 116746/371472 [9:17:32<18:22:43, 3.85it/s] 31%|███▏ | 116747/371472 [9:17:32<18:28:12, 3.83it/s] 31%|███▏ | 116748/371472 [9:17:33<18:45:34, 3.77it/s] 31%|███▏ | 116749/371472 [9:17:33<18:21:50, 3.85it/s] 31%|███▏ | 116750/371472 [9:17:33<18:39:27, 3.79it/s] 31%|███▏ | 116751/371472 [9:17:33<18:32:14, 3.82it/s] 31%|███▏ | 116752/371472 [9:17:34<20:01:47, 3.53it/s] 31%|███▏ | 116753/371472 [9:17:34<20:03:59, 3.53it/s] 31%|███▏ | 116754/371472 [9:17:34<19:37:44, 3.60it/s] 31%|███▏ | 116755/371472 [9:17:35<18:51:32, 3.75it/s] 31%|███▏ | 116756/371472 [9:17:35<20:14:29, 3.50it/s] 31%|███▏ | 116757/371472 [9:17:35<21:39:32, 3.27it/s] 31%|███▏ | 116758/371472 [9:17:35<20:33:49, 3.44it/s] 31%|███▏ | 116759/371472 [9:17:36<19:40:03, 3.60it/s] 31%|███▏ | 116760/371472 [9:17:36<21:07:29, 3.35it/s] {'loss': 3.3735, 'learning_rate': 7.174470469090047e-07, 'epoch': 5.03} + 31%|███▏ | 116760/371472 [9:17:36<21:07:29, 3.35it/s] 31%|███▏ | 116761/371472 [9:17:36<21:13:38, 3.33it/s] 31%|███▏ | 116762/371472 [9:17:37<20:48:47, 3.40it/s] 31%|███▏ | 116763/371472 [9:17:37<20:31:25, 3.45it/s] 31%|███▏ | 116764/371472 [9:17:37<19:55:49, 3.55it/s] 31%|███▏ | 116765/371472 [9:17:38<22:29:55, 3.14it/s] 31%|███▏ | 116766/371472 [9:17:38<21:51:19, 3.24it/s] 31%|███▏ | 116767/371472 [9:17:38<20:45:25, 3.41it/s] 31%|███▏ | 116768/371472 [9:17:38<19:57:54, 3.54it/s] 31%|███▏ | 116769/371472 [9:17:39<19:17:23, 3.67it/s] 31%|███▏ | 116770/371472 [9:17:39<19:04:30, 3.71it/s] 31%|███▏ | 116771/371472 [9:17:39<18:38:58, 3.79it/s] 31%|███▏ | 116772/371472 [9:17:39<18:38:37, 3.79it/s] 31%|███▏ | 116773/371472 [9:17:40<18:40:07, 3.79it/s] 31%|███▏ | 116774/371472 [9:17:40<19:29:07, 3.63it/s] 31%|███▏ | 116775/371472 [9:17:40<19:06:16, 3.70it/s] 31%|███▏ | 116776/371472 [9:17:41<21:01:09, 3.37it/s] 31%|███▏ | 116777/371472 [9:17:41<20:13:27, 3.50it/s] 31%|███▏ | 116778/371472 [9:17:41<19:44:34, 3.58it/s] 31%|███▏ | 116779/371472 [9:17:41<20:08:24, 3.51it/s] 31%|███▏ | 116780/371472 [9:17:42<20:02:46, 3.53it/s] {'loss': 3.3189, 'learning_rate': 7.173985649335259e-07, 'epoch': 5.03} + 31%|███▏ | 116780/371472 [9:17:42<20:02:46, 3.53it/s] 31%|███▏ | 116781/371472 [9:17:42<20:48:23, 3.40it/s] 31%|███▏ | 116782/371472 [9:17:42<19:58:43, 3.54it/s] 31%|███▏ | 116783/371472 [9:17:43<20:06:30, 3.52it/s] 31%|███▏ | 116784/371472 [9:17:43<20:21:22, 3.48it/s] 31%|███▏ | 116785/371472 [9:17:43<19:42:05, 3.59it/s] 31%|███▏ | 116786/371472 [9:17:43<19:37:39, 3.60it/s] 31%|███▏ | 116787/371472 [9:17:44<20:06:10, 3.52it/s] 31%|███▏ | 116788/371472 [9:17:44<21:03:55, 3.36it/s] 31%|███▏ | 116789/371472 [9:17:44<20:15:29, 3.49it/s] 31%|���██▏ | 116790/371472 [9:17:45<19:44:50, 3.58it/s] 31%|███▏ | 116791/371472 [9:17:45<19:17:35, 3.67it/s] 31%|███▏ | 116792/371472 [9:17:45<18:59:56, 3.72it/s] 31%|███▏ | 116793/371472 [9:17:45<20:02:03, 3.53it/s] 31%|███▏ | 116794/371472 [9:17:46<19:58:23, 3.54it/s] 31%|███▏ | 116795/371472 [9:17:46<20:41:41, 3.42it/s] 31%|███▏ | 116796/371472 [9:17:46<20:31:15, 3.45it/s] 31%|███▏ | 116797/371472 [9:17:47<19:45:15, 3.58it/s] 31%|███▏ | 116798/371472 [9:17:47<19:12:22, 3.68it/s] 31%|███▏ | 116799/371472 [9:17:47<19:44:15, 3.58it/s] 31%|███▏ | 116800/371472 [9:17:47<20:03:50, 3.53it/s] {'loss': 3.1903, 'learning_rate': 7.17350082958047e-07, 'epoch': 5.03} + 31%|███▏ | 116800/371472 [9:17:47<20:03:50, 3.53it/s] 31%|███▏ | 116801/371472 [9:17:48<20:59:32, 3.37it/s] 31%|███▏ | 116802/371472 [9:17:48<19:52:56, 3.56it/s] 31%|███▏ | 116803/371472 [9:17:48<21:12:23, 3.34it/s] 31%|███▏ | 116804/371472 [9:17:49<20:21:21, 3.48it/s] 31%|███▏ | 116805/371472 [9:17:49<19:21:04, 3.66it/s] 31%|███▏ | 116806/371472 [9:17:49<19:46:56, 3.58it/s] 31%|███▏ | 116807/371472 [9:17:49<19:52:46, 3.56it/s] 31%|███▏ | 116808/371472 [9:17:50<20:38:41, 3.43it/s] 31%|███▏ | 116809/371472 [9:17:50<19:56:41, 3.55it/s] 31%|███▏ | 116810/371472 [9:17:50<19:52:22, 3.56it/s] 31%|███▏ | 116811/371472 [9:17:51<20:41:23, 3.42it/s] 31%|███▏ | 116812/371472 [9:17:51<21:22:46, 3.31it/s] 31%|███▏ | 116813/371472 [9:17:51<20:17:45, 3.49it/s] 31%|███▏ | 116814/371472 [9:17:51<21:16:46, 3.32it/s] 31%|███▏ | 116815/371472 [9:17:52<21:12:55, 3.33it/s] 31%|███▏ | 116816/371472 [9:17:52<21:38:36, 3.27it/s] 31%|███▏ | 116817/371472 [9:17:52<22:19:07, 3.17it/s] 31%|███▏ | 116818/371472 [9:17:53<22:44:47, 3.11it/s] 31%|███▏ | 116819/371472 [9:17:53<22:04:26, 3.20it/s] 31%|███▏ | 116820/371472 [9:17:53<21:05:45, 3.35it/s] {'loss': 3.4081, 'learning_rate': 7.173016009825679e-07, 'epoch': 5.03} + 31%|███▏ | 116820/371472 [9:17:53<21:05:45, 3.35it/s] 31%|███▏ | 116821/371472 [9:17:54<20:45:24, 3.41it/s] 31%|███▏ | 116822/371472 [9:17:54<20:11:28, 3.50it/s] 31%|███▏ | 116823/371472 [9:17:54<20:30:07, 3.45it/s] 31%|███▏ | 116824/371472 [9:17:55<21:46:55, 3.25it/s] 31%|███▏ | 116825/371472 [9:17:55<21:05:54, 3.35it/s] 31%|███▏ | 116826/371472 [9:17:55<20:46:46, 3.40it/s] 31%|███▏ | 116827/371472 [9:17:55<19:59:43, 3.54it/s] 31%|███▏ | 116828/371472 [9:17:56<19:51:25, 3.56it/s] 31%|███▏ | 116829/371472 [9:17:56<20:01:03, 3.53it/s] 31%|███▏ | 116830/371472 [9:17:56<19:37:02, 3.61it/s] 31%|███▏ | 116831/371472 [9:17:56<19:48:28, 3.57it/s] 31%|███▏ | 116832/371472 [9:17:57<21:25:15, 3.30it/s] 31%|███▏ | 116833/371472 [9:17:57<22:00:07, 3.21it/s] 31%|███▏ | 116834/371472 [9:17:57<22:14:58, 3.18it/s] 31%|███▏ | 116835/371472 [9:17:58<23:53:29, 2.96it/s] 31%|███▏ | 116836/371472 [9:17:58<24:04:50, 2.94it/s] 31%|███▏ | 116837/371472 [9:17:58<22:23:43, 3.16it/s] 31%|███▏ | 116838/371472 [9:17:59<20:55:49, 3.38it/s] 31%|███▏ | 116839/371472 [9:17:59<20:18:11, 3.48it/s] 31%|███▏ | 116840/371472 [9:17:59<19:31:27, 3.62it/s] {'loss': 3.3324, 'learning_rate': 7.172531190070891e-07, 'epoch': 5.03} + 31%|███▏ | 116840/371472 [9:17:59<19:31:27, 3.62it/s] 31%|███▏ | 116841/371472 [9:18:00<20:13:19, 3.50it/s] 31%|███▏ | 116842/371472 [9:18:00<20:09:02, 3.51it/s] 31%|███▏ | 116843/371472 [9:18:00<20:40:12, 3.42it/s] 31%|███▏ | 116844/371472 [9:18:00<20:15:07, 3.49it/s] 31%|███▏ | 116845/371472 [9:18:01<19:44:04, 3.58it/s] 31%|███▏ | 116846/371472 [9:18:01<19:38:59, 3.60it/s] 31%|███▏ | 116847/371472 [9:18:01<19:35:50, 3.61it/s] 31%|███▏ | 116848/371472 [9:18:01<19:35:20, 3.61it/s] 31%|███▏ | 116849/371472 [9:18:02<19:51:05, 3.56it/s] 31%|███▏ | 116850/371472 [9:18:02<21:02:20, 3.36it/s] 31%|███▏ | 116851/371472 [9:18:02<20:58:06, 3.37it/s] 31%|███▏ | 116852/371472 [9:18:03<20:44:50, 3.41it/s] 31%|███▏ | 116853/371472 [9:18:03<20:10:18, 3.51it/s] 31%|███▏ | 116854/371472 [9:18:03<19:53:05, 3.56it/s] 31%|███▏ | 116855/371472 [9:18:03<19:11:04, 3.69it/s] 31%|███▏ | 116856/371472 [9:18:04<19:05:19, 3.71it/s] 31%|███▏ | 116857/371472 [9:18:04<19:39:48, 3.60it/s] 31%|███▏ | 116858/371472 [9:18:04<18:58:54, 3.73it/s] 31%|███▏ | 116859/371472 [9:18:05<19:07:20, 3.70it/s] 31%|███▏ | 116860/371472 [9:18:05<20:21:14, 3.47it/s] {'loss': 3.2918, 'learning_rate': 7.172046370316102e-07, 'epoch': 5.03} + 31%|███▏ | 116860/371472 [9:18:05<20:21:14, 3.47it/s] 31%|███▏ | 116861/371472 [9:18:05<20:07:13, 3.52it/s] 31%|███▏ | 116862/371472 [9:18:05<20:35:45, 3.43it/s] 31%|███▏ | 116863/371472 [9:18:06<20:23:53, 3.47it/s] 31%|███▏ | 116864/371472 [9:18:06<19:58:56, 3.54it/s] 31%|███▏ | 116865/371472 [9:18:06<19:14:44, 3.67it/s] 31%|███▏ | 116866/371472 [9:18:07<18:45:52, 3.77it/s] 31%|███▏ | 116867/371472 [9:18:07<19:34:27, 3.61it/s] 31%|███▏ | 116868/371472 [9:18:07<19:22:24, 3.65it/s] 31%|███▏ | 116869/371472 [9:18:07<21:08:31, 3.35it/s] 31%|███▏ | 116870/371472 [9:18:08<20:35:46, 3.43it/s] 31%|███▏ | 116871/371472 [9:18:08<21:27:00, 3.30it/s] 31%|███▏ | 116872/371472 [9:18:08<22:03:53, 3.21it/s] 31%|███▏ | 116873/371472 [9:18:09<20:37:26, 3.43it/s] 31%|███▏ | 116874/371472 [9:18:09<20:04:58, 3.52it/s] 31%|███▏ | 116875/371472 [9:18:09<21:04:00, 3.36it/s] 31%|███▏ | 116876/371472 [9:18:10<21:19:52, 3.32it/s] 31%|███▏ | 116877/371472 [9:18:10<20:37:33, 3.43it/s] 31%|███▏ | 116878/371472 [9:18:10<19:51:35, 3.56it/s] 31%|███▏ | 116879/371472 [9:18:10<19:41:43, 3.59it/s] 31%|███▏ | 116880/371472 [9:18:11<19:25:52, 3.64it/s] {'loss': 3.4805, 'learning_rate': 7.171561550561313e-07, 'epoch': 5.03} + 31%|███▏ | 116880/371472 [9:18:11<19:25:52, 3.64it/s] 31%|███▏ | 116881/371472 [9:18:11<20:04:44, 3.52it/s] 31%|███▏ | 116882/371472 [9:18:11<20:12:41, 3.50it/s] 31%|███▏ | 116883/371472 [9:18:11<19:44:04, 3.58it/s] 31%|███▏ | 116884/371472 [9:18:12<20:07:41, 3.51it/s] 31%|███▏ | 116885/371472 [9:18:12<20:00:09, 3.54it/s] 31%|███▏ | 116886/371472 [9:18:12<20:02:41, 3.53it/s] 31%|███▏ | 116887/371472 [9:18:13<19:29:45, 3.63it/s] 31%|███▏ | 116888/371472 [9:18:13<19:06:22, 3.70it/s] 31%|███▏ | 116889/371472 [9:18:13<19:14:44, 3.67it/s] 31%|███▏ | 116890/371472 [9:18:13<19:15:17, 3.67it/s] 31%|███▏ | 116891/371472 [9:18:14<20:34:58, 3.44it/s] 31%|███▏ | 116892/371472 [9:18:14<19:50:35, 3.56it/s] 31%|███▏ | 116893/371472 [9:18:14<19:18:20, 3.66it/s] 31%|███▏ | 116894/371472 [9:18:14<18:43:13, 3.78it/s] 31%|███▏ | 116895/371472 [9:18:15<18:35:06, 3.80it/s] 31%|███▏ | 116896/371472 [9:18:15<18:49:04, 3.76it/s] 31%|███▏ | 116897/371472 [9:18:15<19:28:55, 3.63it/s] 31%|███▏ | 116898/371472 [9:18:16<19:16:38, 3.67it/s] 31%|███▏ | 116899/371472 [9:18:16<18:50:23, 3.75it/s] 31%|███▏ | 116900/371472 [9:18:16<19:01:20, 3.72it/s] {'loss': 3.1995, 'learning_rate': 7.171076730806524e-07, 'epoch': 5.04} + 31%|███▏ | 116900/371472 [9:18:16<19:01:20, 3.72it/s] 31%|███▏ | 116901/371472 [9:18:16<19:28:51, 3.63it/s] 31%|███▏ | 116902/371472 [9:18:17<18:45:44, 3.77it/s] 31%|███▏ | 116903/371472 [9:18:17<19:05:01, 3.71it/s] 31%|███▏ | 116904/371472 [9:18:17<19:57:18, 3.54it/s] 31%|███▏ | 116905/371472 [9:18:17<19:49:47, 3.57it/s] 31%|███▏ | 116906/371472 [9:18:18<19:29:53, 3.63it/s] 31%|███▏ | 116907/371472 [9:18:18<19:15:51, 3.67it/s] 31%|███▏ | 116908/371472 [9:18:18<19:32:53, 3.62it/s] 31%|███▏ | 116909/371472 [9:18:19<19:59:10, 3.54it/s] 31%|███▏ | 116910/371472 [9:18:19<20:53:26, 3.38it/s] 31%|███▏ | 116911/371472 [9:18:19<21:40:04, 3.26it/s] 31%|███▏ | 116912/371472 [9:18:20<20:31:10, 3.45it/s] 31%|███▏ | 116913/371472 [9:18:20<19:45:04, 3.58it/s] 31%|███▏ | 116914/371472 [9:18:20<19:12:29, 3.68it/s] 31%|███▏ | 116915/371472 [9:18:20<18:52:38, 3.75it/s] 31%|███▏ | 116916/371472 [9:18:21<22:00:35, 3.21it/s] 31%|███▏ | 116917/371472 [9:18:21<21:21:46, 3.31it/s] 31%|███▏ | 116918/371472 [9:18:21<20:48:10, 3.40it/s] 31%|███▏ | 116919/371472 [9:18:22<20:38:24, 3.43it/s] 31%|███▏ | 116920/371472 [9:18:22<21:08:26, 3.34it/s] {'loss': 3.4255, 'learning_rate': 7.170591911051736e-07, 'epoch': 5.04} + 31%|███▏ | 116920/371472 [9:18:22<21:08:26, 3.34it/s] 31%|███▏ | 116921/371472 [9:18:22<20:38:18, 3.43it/s] 31%|███▏ | 116922/371472 [9:18:22<21:18:37, 3.32it/s] 31%|███▏ | 116923/371472 [9:18:23<21:56:56, 3.22it/s] 31%|███▏ | 116924/371472 [9:18:23<21:49:31, 3.24it/s] 31%|███▏ | 116925/371472 [9:18:23<20:43:26, 3.41it/s] 31%|███▏ | 116926/371472 [9:18:24<20:57:58, 3.37it/s] 31%|███▏ | 116927/371472 [9:18:24<20:51:49, 3.39it/s] 31%|███▏ | 116928/371472 [9:18:24<20:07:58, 3.51it/s] 31%|███▏ | 116929/371472 [9:18:24<19:59:40, 3.54it/s] 31%|███▏ | 116930/371472 [9:18:25<20:34:15, 3.44it/s] 31%|███▏ | 116931/371472 [9:18:25<20:57:26, 3.37it/s] 31%|███▏ | 116932/371472 [9:18:25<20:32:04, 3.44it/s] 31%|███▏ | 116933/371472 [9:18:26<21:33:01, 3.28it/s] 31%|███▏ | 116934/371472 [9:18:26<20:19:09, 3.48it/s] 31%|███▏ | 116935/371472 [9:18:26<22:10:44, 3.19it/s] 31%|███▏ | 116936/371472 [9:18:27<20:46:32, 3.40it/s] 31%|███▏ | 116937/371472 [9:18:27<20:57:39, 3.37it/s] 31%|███▏ | 116938/371472 [9:18:27<19:58:04, 3.54it/s] 31%|███▏ | 116939/371472 [9:18:27<19:05:05, 3.70it/s] 31%|███▏ | 116940/371472 [9:18:28<19:50:00, 3.56it/s] {'loss': 3.0694, 'learning_rate': 7.170107091296945e-07, 'epoch': 5.04} + 31%|███▏ | 116940/371472 [9:18:28<19:50:00, 3.56it/s] 31%|███▏ | 116941/371472 [9:18:28<19:40:39, 3.59it/s] 31%|███▏ | 116942/371472 [9:18:28<20:40:30, 3.42it/s] 31%|███▏ | 116943/371472 [9:18:29<20:51:48, 3.39it/s] 31%|███▏ | 116944/371472 [9:18:29<21:19:02, 3.32it/s] 31%|███▏ | 116945/371472 [9:18:29<22:54:18, 3.09it/s] 31%|███▏ | 116946/371472 [9:18:30<21:48:19, 3.24it/s] 31%|███▏ | 116947/371472 [9:18:30<20:40:22, 3.42it/s] 31%|███▏ | 116948/371472 [9:18:30<20:40:42, 3.42it/s] 31%|███▏ | 116949/371472 [9:18:30<20:12:37, 3.50it/s] 31%|███▏ | 116950/371472 [9:18:31<19:43:11, 3.59it/s] 31%|███▏ | 116951/371472 [9:18:31<19:31:09, 3.62it/s] 31%|███▏ | 116952/371472 [9:18:31<19:18:54, 3.66it/s] 31%|███▏ | 116953/371472 [9:18:31<19:15:30, 3.67it/s] 31%|███▏ | 116954/371472 [9:18:32<19:12:23, 3.68it/s] 31%|███▏ | 116955/371472 [9:18:32<19:01:51, 3.71it/s] 31%|███▏ | 116956/371472 [9:18:32<18:53:07, 3.74it/s] 31%|███▏ | 116957/371472 [9:18:33<19:45:58, 3.58it/s] 31%|███▏ | 116958/371472 [9:18:33<19:49:56, 3.56it/s] 31%|███▏ | 116959/371472 [9:18:33<20:07:36, 3.51it/s] 31%|███▏ | 116960/371472 [9:18:33<19:34:59, 3.61it/s] {'loss': 3.4009, 'learning_rate': 7.169622271542157e-07, 'epoch': 5.04} + 31%|███▏ | 116960/371472 [9:18:33<19:34:59, 3.61it/s] 31%|███▏ | 116961/371472 [9:18:34<19:54:16, 3.55it/s] 31%|███▏ | 116962/371472 [9:18:34<19:25:22, 3.64it/s] 31%|███▏ | 116963/371472 [9:18:34<20:01:26, 3.53it/s] 31%|███▏ | 116964/371472 [9:18:34<19:35:33, 3.61it/s] 31%|███▏ | 116965/371472 [9:18:35<19:04:50, 3.71it/s] 31%|███▏ | 116966/371472 [9:18:35<19:57:00, 3.54it/s] 31%|███▏ | 116967/371472 [9:18:35<21:26:56, 3.30it/s] 31%|███▏ | 116968/371472 [9:18:36<20:31:21, 3.44it/s] 31%|███▏ | 116969/371472 [9:18:36<20:22:02, 3.47it/s] 31%|███▏ | 116970/371472 [9:18:36<20:25:45, 3.46it/s] 31%|███▏ | 116971/371472 [9:18:36<19:48:36, 3.57it/s] 31%|███▏ | 116972/371472 [9:18:37<19:27:14, 3.63it/s] 31%|███▏ | 116973/371472 [9:18:37<20:42:02, 3.42it/s] 31%|███▏ | 116974/371472 [9:18:37<20:32:15, 3.44it/s] 31%|███▏ | 116975/371472 [9:18:38<19:17:31, 3.66it/s] 31%|███▏ | 116976/371472 [9:18:38<19:01:26, 3.72it/s] 31%|███▏ | 116977/371472 [9:18:38<18:49:45, 3.75it/s] 31%|███▏ | 116978/371472 [9:18:38<20:32:12, 3.44it/s] 31%|███▏ | 116979/371472 [9:18:39<20:07:47, 3.51it/s] 31%|███▏ | 116980/371472 [9:18:39<19:38:09, 3.60it/s] {'loss': 3.3476, 'learning_rate': 7.169137451787368e-07, 'epoch': 5.04} + 31%|███▏ | 116980/371472 [9:18:39<19:38:09, 3.60it/s] 31%|███▏ | 116981/371472 [9:18:39<20:59:04, 3.37it/s] 31%|███▏ | 116982/371472 [9:18:40<20:30:18, 3.45it/s] 31%|█��█▏ | 116983/371472 [9:18:40<20:55:02, 3.38it/s] 31%|███▏ | 116984/371472 [9:18:40<20:19:51, 3.48it/s] 31%|███▏ | 116985/371472 [9:18:41<21:54:10, 3.23it/s] 31%|███▏ | 116986/371472 [9:18:41<20:46:24, 3.40it/s] 31%|███▏ | 116987/371472 [9:18:41<20:17:05, 3.48it/s] 31%|███▏ | 116988/371472 [9:18:41<20:29:46, 3.45it/s] 31%|███▏ | 116989/371472 [9:18:42<20:21:14, 3.47it/s] 31%|███▏ | 116990/371472 [9:18:42<20:11:03, 3.50it/s] 31%|███▏ | 116991/371472 [9:18:42<20:58:02, 3.37it/s] 31%|███▏ | 116992/371472 [9:18:43<20:08:48, 3.51it/s] 31%|███▏ | 116993/371472 [9:18:43<20:06:51, 3.51it/s] 31%|███▏ | 116994/371472 [9:18:43<19:49:21, 3.57it/s] 31%|███▏ | 116995/371472 [9:18:43<19:07:12, 3.70it/s] 31%|███▏ | 116996/371472 [9:18:44<18:35:56, 3.80it/s] 31%|███▏ | 116997/371472 [9:18:44<18:35:08, 3.80it/s] 31%|███▏ | 116998/371472 [9:18:44<19:06:55, 3.70it/s] 31%|███▏ | 116999/371472 [9:18:44<18:43:48, 3.77it/s] 31%|███▏ | 117000/371472 [9:18:45<18:39:09, 3.79it/s] {'loss': 3.2149, 'learning_rate': 7.16865263203258e-07, 'epoch': 5.04} + 31%|███▏ | 117000/371472 [9:18:45<18:39:09, 3.79it/s] 31%|███▏ | 117001/371472 [9:18:45<18:39:53, 3.79it/s] 31%|███▏ | 117002/371472 [9:18:45<18:54:11, 3.74it/s] 31%|███▏ | 117003/371472 [9:18:45<18:59:26, 3.72it/s] 31%|███▏ | 117004/371472 [9:18:46<18:54:48, 3.74it/s] 31%|███▏ | 117005/371472 [9:18:46<22:04:00, 3.20it/s] 31%|███▏ | 117006/371472 [9:18:46<20:40:05, 3.42it/s] 31%|███▏ | 117007/371472 [9:18:47<20:12:13, 3.50it/s] 31%|███▏ | 117008/371472 [9:18:47<19:37:19, 3.60it/s] 31%|███▏ | 117009/371472 [9:18:47<19:08:47, 3.69it/s] 31%|███▏ | 117010/371472 [9:18:48<25:28:44, 2.77it/s] 31%|███▏ | 117011/371472 [9:18:48<23:56:56, 2.95it/s] 31%|███▏ | 117012/371472 [9:18:48<22:46:25, 3.10it/s] 31%|███▏ | 117013/371472 [9:18:49<21:38:27, 3.27it/s] 32%|███▏ | 117014/371472 [9:18:49<20:35:56, 3.43it/s] 32%|███▏ | 117015/371472 [9:18:49<21:39:22, 3.26it/s] 32%|███▏ | 117016/371472 [9:18:49<21:21:46, 3.31it/s] 32%|███▏ | 117017/371472 [9:18:50<20:54:42, 3.38it/s] 32%|███▏ | 117018/371472 [9:18:50<20:50:37, 3.39it/s] 32%|███▏ | 117019/371472 [9:18:50<19:48:29, 3.57it/s] 32%|███▏ | 117020/371472 [9:18:51<19:36:48, 3.60it/s] {'loss': 3.2856, 'learning_rate': 7.168167812277791e-07, 'epoch': 5.04} + 32%|███▏ | 117020/371472 [9:18:51<19:36:48, 3.60it/s] 32%|███▏ | 117021/371472 [9:18:51<19:00:15, 3.72it/s] 32%|███▏ | 117022/371472 [9:18:51<18:52:26, 3.74it/s] 32%|███▏ | 117023/371472 [9:18:51<19:58:26, 3.54it/s] 32%|███▏ | 117024/371472 [9:18:52<19:31:47, 3.62it/s] 32%|███▏ | 117025/371472 [9:18:52<18:49:58, 3.75it/s] 32%|███▏ | 117026/371472 [9:18:52<18:40:49, 3.78it/s] 32%|███▏ | 117027/371472 [9:18:52<18:47:20, 3.76it/s] 32%|███▏ | 117028/371472 [9:18:53<18:57:58, 3.73it/s] 32%|███▏ | 117029/371472 [9:18:53<19:38:58, 3.60it/s] 32%|███▏ | 117030/371472 [9:18:53<19:49:11, 3.57it/s] 32%|███▏ | 117031/371472 [9:18:54<18:53:59, 3.74it/s] 32%|███▏ | 117032/371472 [9:18:54<18:32:05, 3.81it/s] 32%|███▏ | 117033/371472 [9:18:54<19:53:37, 3.55it/s] 32%|███▏ | 117034/371472 [9:18:54<19:13:36, 3.68it/s] 32%|███▏ | 117035/371472 [9:18:55<19:17:41, 3.66it/s] 32%|███▏ | 117036/371472 [9:18:55<19:32:40, 3.62it/s] 32%|███▏ | 117037/371472 [9:18:55<19:16:51, 3.67it/s] 32%|███▏ | 117038/371472 [9:18:56<20:34:00, 3.44it/s] 32%|███▏ | 117039/371472 [9:18:56<21:11:16, 3.34it/s] 32%|███▏ | 117040/371472 [9:18:56<20:31:40, 3.44it/s] {'loss': 3.4699, 'learning_rate': 7.167682992523003e-07, 'epoch': 5.04} + 32%|███▏ | 117040/371472 [9:18:56<20:31:40, 3.44it/s] 32%|███▏ | 117041/371472 [9:18:56<21:16:28, 3.32it/s] 32%|███▏ | 117042/371472 [9:18:57<21:18:25, 3.32it/s] 32%|███▏ | 117043/371472 [9:18:57<21:14:43, 3.33it/s] 32%|███▏ | 117044/371472 [9:18:57<21:04:59, 3.35it/s] 32%|███▏ | 117045/371472 [9:18:58<21:32:37, 3.28it/s] 32%|███▏ | 117046/371472 [9:18:58<20:12:32, 3.50it/s] 32%|███▏ | 117047/371472 [9:18:58<19:53:51, 3.55it/s] 32%|███▏ | 117048/371472 [9:18:58<20:26:17, 3.46it/s] 32%|███▏ | 117049/371472 [9:18:59<20:39:56, 3.42it/s] 32%|███▏ | 117050/371472 [9:18:59<21:26:58, 3.29it/s] 32%|███▏ | 117051/371472 [9:18:59<21:23:25, 3.30it/s] 32%|███▏ | 117052/371472 [9:19:00<21:12:23, 3.33it/s] 32%|███▏ | 117053/371472 [9:19:00<20:39:52, 3.42it/s] 32%|███▏ | 117054/371472 [9:19:00<20:37:40, 3.43it/s] 32%|███▏ | 117055/371472 [9:19:01<20:58:03, 3.37it/s] 32%|███▏ | 117056/371472 [9:19:01<20:52:41, 3.38it/s] 32%|███▏ | 117057/371472 [9:19:01<22:16:12, 3.17it/s] 32%|███▏ | 117058/371472 [9:19:02<21:54:51, 3.22it/s] 32%|███▏ | 117059/371472 [9:19:02<20:47:45, 3.40it/s] 32%|███▏ | 117060/371472 [9:19:02<20:36:08, 3.43it/s] {'loss': 3.3669, 'learning_rate': 7.167198172768212e-07, 'epoch': 5.04} + 32%|███▏ | 117060/371472 [9:19:02<20:36:08, 3.43it/s] 32%|███▏ | 117061/371472 [9:19:02<20:15:06, 3.49it/s] 32%|███▏ | 117062/371472 [9:19:03<21:08:22, 3.34it/s] 32%|███▏ | 117063/371472 [9:19:03<20:36:01, 3.43it/s] 32%|███▏ | 117064/371472 [9:19:03<20:26:30, 3.46it/s] 32%|███▏ | 117065/371472 [9:19:03<20:01:12, 3.53it/s] 32%|███▏ | 117066/371472 [9:19:04<20:20:41, 3.47it/s] 32%|███▏ | 117067/371472 [9:19:04<19:49:26, 3.56it/s] 32%|███▏ | 117068/371472 [9:19:04<19:22:00, 3.65it/s] 32%|███▏ | 117069/371472 [9:19:05<20:25:13, 3.46it/s] 32%|███▏ | 117070/371472 [9:19:05<20:05:52, 3.52it/s] 32%|███▏ | 117071/371472 [9:19:05<19:56:45, 3.54it/s] 32%|███▏ | 117072/371472 [9:19:05<19:08:02, 3.69it/s] 32%|███▏ | 117073/371472 [9:19:06<18:37:09, 3.80it/s] 32%|███▏ | 117074/371472 [9:19:06<19:30:01, 3.62it/s] 32%|███▏ | 117075/371472 [9:19:06<18:58:57, 3.72it/s] 32%|███▏ | 117076/371472 [9:19:07<20:48:38, 3.40it/s] 32%|███▏ | 117077/371472 [9:19:07<20:32:39, 3.44it/s] 32%|███▏ | 117078/371472 [9:19:07<19:32:47, 3.62it/s] 32%|███▏ | 117079/371472 [9:19:07<19:38:42, 3.60it/s] 32%|███▏ | 117080/371472 [9:19:08<18:48:58, 3.76it/s] {'loss': 3.2544, 'learning_rate': 7.166713353013424e-07, 'epoch': 5.04} + 32%|███▏ | 117080/371472 [9:19:08<18:48:58, 3.76it/s] 32%|███▏ | 117081/371472 [9:19:08<18:56:51, 3.73it/s] 32%|███▏ | 117082/371472 [9:19:08<18:45:02, 3.77it/s] 32%|███▏ | 117083/371472 [9:19:08<19:28:19, 3.63it/s] 32%|███▏ | 117084/371472 [9:19:09<20:07:17, 3.51it/s] 32%|███▏ | 117085/371472 [9:19:09<20:37:53, 3.42it/s] 32%|███▏ | 117086/371472 [9:19:09<19:57:40, 3.54it/s] 32%|███▏ | 117087/371472 [9:19:10<20:03:43, 3.52it/s] 32%|███▏ | 117088/371472 [9:19:10<20:42:12, 3.41it/s] 32%|███▏ | 117089/371472 [9:19:10<21:08:13, 3.34it/s] 32%|███▏ | 117090/371472 [9:19:11<20:56:53, 3.37it/s] 32%|███▏ | 117091/371472 [9:19:11<20:13:11, 3.49it/s] 32%|███▏ | 117092/371472 [9:19:11<19:22:29, 3.65it/s] 32%|███▏ | 117093/371472 [9:19:11<19:01:37, 3.71it/s] 32%|███▏ | 117094/371472 [9:19:12<20:02:04, 3.53it/s] 32%|███▏ | 117095/371472 [9:19:12<19:59:10, 3.54it/s] 32%|███▏ | 117096/371472 [9:19:12<19:41:27, 3.59it/s] 32%|███▏ | 117097/371472 [9:19:12<19:36:52, 3.60it/s] 32%|███▏ | 117098/371472 [9:19:13<19:34:40, 3.61it/s] 32%|███▏ | 117099/371472 [9:19:13<19:00:33, 3.72it/s] 32%|███▏ | 117100/371472 [9:19:13<19:06:50, 3.70it/s] {'loss': 3.4742, 'learning_rate': 7.166228533258635e-07, 'epoch': 5.04} + 32%|███▏ | 117100/371472 [9:19:13<19:06:50, 3.70it/s] 32%|███▏ | 117101/371472 [9:19:14<19:40:31, 3.59it/s] 32%|███▏ | 117102/371472 [9:19:14<19:27:20, 3.63it/s] 32%|███▏ | 117103/371472 [9:19:14<19:43:00, 3.58it/s] 32%|███▏ | 117104/371472 [9:19:14<20:00:28, 3.53it/s] 32%|███▏ | 117105/371472 [9:19:15<20:56:22, 3.37it/s] 32%|███▏ | 117106/371472 [9:19:15<19:59:49, 3.53it/s] 32%|███▏ | 117107/371472 [9:19:15<20:12:04, 3.50it/s] 32%|███▏ | 117108/371472 [9:19:15<19:08:41, 3.69it/s] 32%|███▏ | 117109/371472 [9:19:16<18:52:55, 3.74it/s] 32%|███▏ | 117110/371472 [9:19:16<18:55:30, 3.73it/s] 32%|███▏ | 117111/371472 [9:19:16<18:59:54, 3.72it/s] 32%|███▏ | 117112/371472 [9:19:17<18:25:22, 3.84it/s] 32%|███▏ | 117113/371472 [9:19:17<19:04:54, 3.70it/s] 32%|███▏ | 117114/371472 [9:19:17<21:01:21, 3.36it/s] 32%|███▏ | 117115/371472 [9:19:17<20:54:48, 3.38it/s] 32%|███▏ | 117116/371472 [9:19:18<19:44:21, 3.58it/s] 32%|███▏ | 117117/371472 [9:19:18<20:54:05, 3.38it/s] 32%|███▏ | 117118/371472 [9:19:18<20:35:33, 3.43it/s] 32%|███▏ | 117119/371472 [9:19:19<20:03:32, 3.52it/s] 32%|███▏ | 117120/371472 [9:19:19<19:34:34, 3.61it/s] {'loss': 3.1644, 'learning_rate': 7.165743713503845e-07, 'epoch': 5.04} + 32%|███▏ | 117120/371472 [9:19:19<19:34:34, 3.61it/s] 32%|███▏ | 117121/371472 [9:19:19<19:24:17, 3.64it/s] 32%|███▏ | 117122/371472 [9:19:19<18:49:16, 3.75it/s] 32%|███▏ | 117123/371472 [9:19:20<19:42:51, 3.58it/s] 32%|███▏ | 117124/371472 [9:19:20<20:37:08, 3.43it/s] 32%|███▏ | 117125/371472 [9:19:20<20:10:42, 3.50it/s] 32%|███▏ | 117126/371472 [9:19:21<19:58:51, 3.54it/s] 32%|███▏ | 117127/371472 [9:19:21<19:34:00, 3.61it/s] 32%|███▏ | 117128/371472 [9:19:21<20:39:16, 3.42it/s] 32%|███▏ | 117129/371472 [9:19:21<19:54:34, 3.55it/s] 32%|███▏ | 117130/371472 [9:19:22<20:15:09, 3.49it/s] 32%|███▏ | 117131/371472 [9:19:22<20:19:13, 3.48it/s] 32%|███▏ | 117132/371472 [9:19:22<20:46:01, 3.40it/s] 32%|███▏ | 117133/371472 [9:19:23<22:46:02, 3.10it/s] 32%|███▏ | 117134/371472 [9:19:23<22:10:50, 3.19it/s] 32%|███▏ | 117135/371472 [9:19:23<20:51:15, 3.39it/s] 32%|███▏ | 117136/371472 [9:19:24<21:12:30, 3.33it/s] 32%|███▏ | 117137/371472 [9:19:24<21:54:31, 3.22it/s] 32%|███▏ | 117138/371472 [9:19:24<20:58:57, 3.37it/s] 32%|███▏ | 117139/371472 [9:19:24<20:47:17, 3.40it/s] 32%|███▏ | 117140/371472 [9:19:25<20:01:46, 3.53it/s] {'loss': 3.2146, 'learning_rate': 7.165258893749057e-07, 'epoch': 5.05} + 32%|███▏ | 117140/371472 [9:19:25<20:01:46, 3.53it/s] 32%|███▏ | 117141/371472 [9:19:25<19:46:21, 3.57it/s] 32%|███▏ | 117142/371472 [9:19:25<19:00:52, 3.72it/s] 32%|███▏ | 117143/371472 [9:19:26<19:31:07, 3.62it/s] 32%|███▏ | 117144/371472 [9:19:26<19:20:10, 3.65it/s] 32%|███▏ | 117145/371472 [9:19:26<20:27:44, 3.45it/s] 32%|███▏ | 117146/371472 [9:19:26<20:07:32, 3.51it/s] 32%|███▏ | 117147/371472 [9:19:27<20:30:42, 3.44it/s] 32%|███▏ | 117148/371472 [9:19:27<20:35:05, 3.43it/s] 32%|███▏ | 117149/371472 [9:19:27<21:21:43, 3.31it/s] 32%|███▏ | 117150/371472 [9:19:28<21:03:10, 3.36it/s] 32%|███▏ | 117151/371472 [9:19:28<20:17:58, 3.48it/s] 32%|███▏ | 117152/371472 [9:19:28<20:40:27, 3.42it/s] 32%|███▏ | 117153/371472 [9:19:28<20:19:18, 3.48it/s] 32%|███▏ | 117154/371472 [9:19:29<20:27:11, 3.45it/s] 32%|███▏ | 117155/371472 [9:19:29<19:59:57, 3.53it/s] 32%|███▏ | 117156/371472 [9:19:29<20:33:46, 3.44it/s] 32%|███▏ | 117157/371472 [9:19:30<19:47:34, 3.57it/s] 32%|███▏ | 117158/371472 [9:19:30<19:47:31, 3.57it/s] 32%|███▏ | 117159/371472 [9:19:30<20:22:25, 3.47it/s] 32%|███▏ | 117160/371472 [9:19:30<20:42:52, 3.41it/s] {'loss': 3.2629, 'learning_rate': 7.164774073994269e-07, 'epoch': 5.05} + 32%|███▏ | 117160/371472 [9:19:30<20:42:52, 3.41it/s] 32%|███▏ | 117161/371472 [9:19:31<20:01:05, 3.53it/s] 32%|███▏ | 117162/371472 [9:19:31<21:58:46, 3.21it/s] 32%|███▏ | 117163/371472 [9:19:31<22:31:08, 3.14it/s] 32%|███▏ | 117164/371472 [9:19:32<22:28:29, 3.14it/s] 32%|███▏ | 117165/371472 [9:19:32<22:09:59, 3.19it/s] 32%|███▏ | 117166/371472 [9:19:32<21:37:57, 3.27it/s] 32%|███▏ | 117167/371472 [9:19:33<20:46:08, 3.40it/s] 32%|███▏ | 117168/371472 [9:19:33<20:10:10, 3.50it/s] 32%|███▏ | 117169/371472 [9:19:33<20:50:05, 3.39it/s] 32%|███▏ | 117170/371472 [9:19:33<20:51:57, 3.39it/s] 32%|███▏ | 117171/371472 [9:19:34<20:55:13, 3.38it/s] 32%|███▏ | 117172/371472 [9:19:34<19:54:06, 3.55it/s] 32%|███▏ | 117173/371472 [9:19:34<19:59:07, 3.53it/s] 32%|███▏ | 117174/371472 [9:19:35<19:44:43, 3.58it/s] 32%|███▏ | 117175/371472 [9:19:35<19:29:35, 3.62it/s] 32%|███▏ | 117176/371472 [9:19:35<19:42:03, 3.59it/s] 32%|███▏ | 117177/371472 [9:19:35<18:53:26, 3.74it/s] 32%|███▏ | 117178/371472 [9:19:36<18:46:24, 3.76it/s] 32%|███▏ | 117179/371472 [9:19:36<20:27:40, 3.45it/s] 32%|███▏ | 117180/371472 [9:19:36<19:37:45, 3.60it/s] {'loss': 3.4091, 'learning_rate': 7.16428925423948e-07, 'epoch': 5.05} + 32%|███▏ | 117180/371472 [9:19:36<19:37:45, 3.60it/s] 32%|███▏ | 117181/371472 [9:19:37<20:44:25, 3.41it/s] 32%|███▏ | 117182/371472 [9:19:37<20:17:14, 3.48it/s] 32%|███▏ | 117183/371472 [9:19:37<21:31:10, 3.28it/s] 32%|███▏ | 117184/371472 [9:19:37<20:46:26, 3.40it/s] 32%|███▏ | 117185/371472 [9:19:38<19:44:06, 3.58it/s] 32%|███▏ | 117186/371472 [9:19:38<19:49:02, 3.56it/s] 32%|███▏ | 117187/371472 [9:19:38<21:03:41, 3.35it/s] 32%|███▏ | 117188/371472 [9:19:39<20:15:15, 3.49it/s] 32%|███▏ | 117189/371472 [9:19:39<19:43:49, 3.58it/s] 32%|███▏ | 117190/371472 [9:19:39<21:05:14, 3.35it/s] 32%|███▏ | 117191/371472 [9:19:39<19:50:50, 3.56it/s] 32%|███▏ | 117192/371472 [9:19:40<19:35:12, 3.61it/s] 32%|███▏ | 117193/371472 [9:19:40<22:17:18, 3.17it/s] 32%|███▏ | 117194/371472 [9:19:40<21:22:50, 3.30it/s] 32%|███▏ | 117195/371472 [9:19:41<21:05:04, 3.35it/s] 32%|███▏ | 117196/371472 [9:19:41<20:18:54, 3.48it/s] 32%|███▏ | 117197/371472 [9:19:41<20:22:38, 3.47it/s] 32%|███▏ | 117198/371472 [9:19:42<20:39:03, 3.42it/s] 32%|███▏ | 117199/371472 [9:19:42<19:45:22, 3.58it/s] 32%|███▏ | 117200/371472 [9:19:42<20:06:30, 3.51it/s] {'loss': 3.3521, 'learning_rate': 7.163804434484689e-07, 'epoch': 5.05} + 32%|███▏ | 117200/371472 [9:19:42<20:06:30, 3.51it/s] 32%|███▏ | 117201/371472 [9:19:42<19:40:56, 3.59it/s] 32%|███▏ | 117202/371472 [9:19:43<20:01:07, 3.53it/s] 32%|███▏ | 117203/371472 [9:19:43<20:15:49, 3.49it/s] 32%|███▏ | 117204/371472 [9:19:43<19:24:59, 3.64it/s] 32%|███▏ | 117205/371472 [9:19:43<19:21:16, 3.65it/s] 32%|███▏ | 117206/371472 [9:19:44<18:54:45, 3.73it/s] 32%|███▏ | 117207/371472 [9:19:44<18:48:51, 3.75it/s] 32%|███▏ | 117208/371472 [9:19:44<18:52:47, 3.74it/s] 32%|███▏ | 117209/371472 [9:19:45<19:41:35, 3.59it/s] 32%|███▏ | 117210/371472 [9:19:45<19:14:51, 3.67it/s] 32%|███▏ | 117211/371472 [9:19:45<19:13:40, 3.67it/s] 32%|███▏ | 117212/371472 [9:19:45<19:15:41, 3.67it/s] 32%|███▏ | 117213/371472 [9:19:46<19:31:20, 3.62it/s] 32%|███▏ | 117214/371472 [9:19:46<19:25:52, 3.63it/s] 32%|███▏ | 117215/371472 [9:19:46<18:54:53, 3.73it/s] 32%|███▏ | 117216/371472 [9:19:46<19:41:54, 3.59it/s] 32%|███▏ | 117217/371472 [9:19:47<19:55:33, 3.54it/s] 32%|███▏ | 117218/371472 [9:19:47<19:28:44, 3.63it/s] 32%|███▏ | 117219/371472 [9:19:47<20:11:47, 3.50it/s] 32%|███▏ | 117220/371472 [9:19:48<19:59:46, 3.53it/s] {'loss': 3.3163, 'learning_rate': 7.163319614729901e-07, 'epoch': 5.05} + 32%|███▏ | 117220/371472 [9:19:48<19:59:46, 3.53it/s] 32%|███▏ | 117221/371472 [9:19:48<19:08:15, 3.69it/s] 32%|███▏ | 117222/371472 [9:19:48<19:27:53, 3.63it/s] 32%|███▏ | 117223/371472 [9:19:48<19:53:49, 3.55it/s] 32%|███▏ | 117224/371472 [9:19:49<19:49:44, 3.56it/s] 32%|███▏ | 117225/371472 [9:19:49<19:36:22, 3.60it/s] 32%|███▏ | 117226/371472 [9:19:49<19:14:00, 3.67it/s] 32%|███▏ | 117227/371472 [9:19:49<19:16:04, 3.67it/s] 32%|███▏ | 117228/371472 [9:19:50<18:37:16, 3.79it/s] 32%|███▏ | 117229/371472 [9:19:50<18:43:50, 3.77it/s] 32%|███▏ | 117230/371472 [9:19:50<18:56:13, 3.73it/s] 32%|███▏ | 117231/371472 [9:19:51<20:17:59, 3.48it/s] 32%|███▏ | 117232/371472 [9:19:51<20:50:17, 3.39it/s] 32%|███▏ | 117233/371472 [9:19:51<20:14:31, 3.49it/s] 32%|███▏ | 117234/371472 [9:19:51<19:36:29, 3.60it/s] 32%|███▏ | 117235/371472 [9:19:52<20:11:21, 3.50it/s] 32%|███▏ | 117236/371472 [9:19:52<21:49:09, 3.24it/s] 32%|███▏ | 117237/371472 [9:19:52<22:20:08, 3.16it/s] 32%|███▏ | 117238/371472 [9:19:53<22:29:06, 3.14it/s] 32%|███▏ | 117239/371472 [9:19:53<22:49:58, 3.09it/s] 32%|███▏ | 117240/371472 [9:19:53<21:22:14, 3.30it/s] {'loss': 3.2432, 'learning_rate': 7.162834794975113e-07, 'epoch': 5.05} + 32%|███▏ | 117240/371472 [9:19:53<21:22:14, 3.30it/s] 32%|███▏ | 117241/371472 [9:19:54<20:35:45, 3.43it/s] 32%|███▏ | 117242/371472 [9:19:54<20:12:54, 3.49it/s] 32%|███▏ | 117243/371472 [9:19:54<20:17:51, 3.48it/s] 32%|███▏ | 117244/371472 [9:19:54<19:48:15, 3.57it/s] 32%|███▏ | 117245/371472 [9:19:55<19:25:52, 3.63it/s] 32%|███▏ | 117246/371472 [9:19:55<18:43:39, 3.77it/s] 32%|███▏ | 117247/371472 [9:19:55<20:33:08, 3.44it/s] 32%|███▏ | 117248/371472 [9:19:56<19:52:19, 3.55it/s] 32%|███▏ | 117249/371472 [9:19:56<20:18:11, 3.48it/s] 32%|███▏ | 117250/371472 [9:19:56<19:34:03, 3.61it/s] 32%|███▏ | 117251/371472 [9:19:56<19:18:09, 3.66it/s] 32%|███▏ | 117252/371472 [9:19:57<19:32:09, 3.61it/s] 32%|███▏ | 117253/371472 [9:19:57<19:17:26, 3.66it/s] 32%|███▏ | 117254/371472 [9:19:57<20:36:56, 3.43it/s] 32%|███▏ | 117255/371472 [9:19:58<20:14:38, 3.49it/s] 32%|███▏ | 117256/371472 [9:19:58<21:41:25, 3.26it/s] 32%|███▏ | 117257/371472 [9:19:58<21:42:39, 3.25it/s] 32%|███▏ | 117258/371472 [9:19:58<20:57:15, 3.37it/s] 32%|███▏ | 117259/371472 [9:19:59<20:00:37, 3.53it/s] 32%|███▏ | 117260/371472 [9:19:59<21:10:54, 3.33it/s] {'loss': 3.3028, 'learning_rate': 7.162349975220324e-07, 'epoch': 5.05} + 32%|███▏ | 117260/371472 [9:19:59<21:10:54, 3.33it/s] 32%|███▏ | 117261/371472 [9:19:59<20:23:30, 3.46it/s] 32%|███▏ | 117262/371472 [9:20:00<19:27:06, 3.63it/s] 32%|███▏ | 117263/371472 [9:20:00<19:03:12, 3.71it/s] 32%|███▏ | 117264/371472 [9:20:00<19:13:23, 3.67it/s] 32%|███▏ | 117265/371472 [9:20:00<19:33:17, 3.61it/s] 32%|███▏ | 117266/371472 [9:20:01<20:57:14, 3.37it/s] 32%|███▏ | 117267/371472 [9:20:01<21:52:18, 3.23it/s] 32%|███▏ | 117268/371472 [9:20:01<20:58:06, 3.37it/s] 32%|███▏ | 117269/371472 [9:20:02<20:17:15, 3.48it/s] 32%|███▏ | 117270/371472 [9:20:02<19:36:44, 3.60it/s] 32%|███▏ | 117271/371472 [9:20:02<20:27:11, 3.45it/s] 32%|███▏ | 117272/371472 [9:20:02<19:56:03, 3.54it/s] 32%|███▏ | 117273/371472 [9:20:03<19:30:08, 3.62it/s] 32%|███▏ | 117274/371472 [9:20:03<20:34:15, 3.43it/s] 32%|███▏ | 117275/371472 [9:20:03<20:42:41, 3.41it/s] 32%|███▏ | 117276/371472 [9:20:04<20:37:19, 3.42it/s] 32%|███▏ | 117277/371472 [9:20:04<20:30:32, 3.44it/s] 32%|███▏ | 117278/371472 [9:20:04<19:35:52, 3.60it/s] 32%|███▏ | 117279/371472 [9:20:04<20:26:05, 3.46it/s] 32%|███▏ | 117280/371472 [9:20:05<20:41:27, 3.41it/s] {'loss': 3.3965, 'learning_rate': 7.161865155465534e-07, 'epoch': 5.05} + 32%|███▏ | 117280/371472 [9:20:05<20:41:27, 3.41it/s] 32%|███▏ | 117281/371472 [9:20:05<20:25:20, 3.46it/s] 32%|███▏ | 117282/371472 [9:20:05<20:26:08, 3.46it/s] 32%|███▏ | 117283/371472 [9:20:06<20:50:21, 3.39it/s] 32%|███▏ | 117284/371472 [9:20:06<20:26:09, 3.46it/s] 32%|███▏ | 117285/371472 [9:20:06<19:30:57, 3.62it/s] 32%|███▏ | 117286/371472 [9:20:06<20:04:46, 3.52it/s] 32%|███▏ | 117287/371472 [9:20:07<19:50:05, 3.56it/s] 32%|███▏ | 117288/371472 [9:20:07<19:24:46, 3.64it/s] 32%|███▏ | 117289/371472 [9:20:07<19:03:44, 3.70it/s] 32%|███▏ | 117290/371472 [9:20:08<18:49:06, 3.75it/s] 32%|███▏ | 117291/371472 [9:20:08<19:07:56, 3.69it/s] 32%|███▏ | 117292/371472 [9:20:08<19:01:13, 3.71it/s] 32%|███▏ | 117293/371472 [9:20:08<19:11:20, 3.68it/s] 32%|███▏ | 117294/371472 [9:20:09<21:10:13, 3.34it/s] 32%|███▏ | 117295/371472 [9:20:09<21:19:59, 3.31it/s] 32%|███▏ | 117296/371472 [9:20:09<20:42:02, 3.41it/s] 32%|███▏ | 117297/371472 [9:20:10<21:28:44, 3.29it/s] 32%|███▏ | 117298/371472 [9:20:10<20:47:42, 3.40it/s] 32%|███▏ | 117299/371472 [9:20:10<20:31:02, 3.44it/s] 32%|███▏ | 117300/371472 [9:20:10<20:15:37, 3.48it/s] {'loss': 3.3166, 'learning_rate': 7.161380335710746e-07, 'epoch': 5.05} + 32%|███▏ | 117300/371472 [9:20:10<20:15:37, 3.48it/s] 32%|███▏ | 117301/371472 [9:20:11<20:33:21, 3.43it/s] 32%|███▏ | 117302/371472 [9:20:11<19:51:21, 3.56it/s] 32%|███▏ | 117303/371472 [9:20:11<19:29:42, 3.62it/s] 32%|███▏ | 117304/371472 [9:20:12<19:14:54, 3.67it/s] 32%|███▏ | 117305/371472 [9:20:12<18:42:11, 3.77it/s] 32%|███▏ | 117306/371472 [9:20:12<18:40:44, 3.78it/s] 32%|███▏ | 117307/371472 [9:20:12<18:22:37, 3.84it/s] 32%|███▏ | 117308/371472 [9:20:13<18:28:44, 3.82it/s] 32%|███▏ | 117309/371472 [9:20:13<17:58:02, 3.93it/s] 32%|███▏ | 117310/371472 [9:20:13<18:27:20, 3.83it/s] 32%|███▏ | 117311/371472 [9:20:13<18:24:58, 3.83it/s] 32%|███▏ | 117312/371472 [9:20:14<19:26:00, 3.63it/s] 32%|███▏ | 117313/371472 [9:20:14<19:47:28, 3.57it/s] 32%|███▏ | 117314/371472 [9:20:14<19:27:58, 3.63it/s] 32%|███▏ | 117315/371472 [9:20:15<20:15:35, 3.48it/s] 32%|███▏ | 117316/371472 [9:20:15<19:31:22, 3.62it/s] 32%|███▏ | 117317/371472 [9:20:15<19:33:31, 3.61it/s] 32%|███▏ | 117318/371472 [9:20:15<22:00:27, 3.21it/s] 32%|███▏ | 117319/371472 [9:20:16<21:16:30, 3.32it/s] 32%|███▏ | 117320/371472 [9:20:16<20:18:55, 3.48it/s] {'loss': 3.4106, 'learning_rate': 7.160895515955957e-07, 'epoch': 5.05} + 32%|███▏ | 117320/371472 [9:20:16<20:18:55, 3.48it/s] 32%|███▏ | 117321/371472 [9:20:16<20:16:21, 3.48it/s] 32%|███▏ | 117322/371472 [9:20:17<19:57:49, 3.54it/s] 32%|███▏ | 117323/371472 [9:20:17<19:57:52, 3.54it/s] 32%|███▏ | 117324/371472 [9:20:17<19:04:38, 3.70it/s] 32%|███▏ | 117325/371472 [9:20:17<19:50:49, 3.56it/s] 32%|███▏ | 117326/371472 [9:20:18<20:42:23, 3.41it/s] 32%|███▏ | 117327/371472 [9:20:18<20:18:23, 3.48it/s] 32%|███▏ | 117328/371472 [9:20:18<20:34:26, 3.43it/s] 32%|███▏ | 117329/371472 [9:20:19<20:18:06, 3.48it/s] 32%|███▏ | 117330/371472 [9:20:19<20:18:06, 3.48it/s] 32%|███▏ | 117331/371472 [9:20:19<19:28:27, 3.63it/s] 32%|███▏ | 117332/371472 [9:20:19<19:14:09, 3.67it/s] 32%|███▏ | 117333/371472 [9:20:20<18:29:57, 3.82it/s] 32%|███▏ | 117334/371472 [9:20:20<18:35:17, 3.80it/s] 32%|███▏ | 117335/371472 [9:20:20<18:53:15, 3.74it/s] 32%|███▏ | 117336/371472 [9:20:20<20:27:31, 3.45it/s] 32%|███▏ | 117337/371472 [9:20:21<20:36:29, 3.43it/s] 32%|███▏ | 117338/371472 [9:20:21<20:05:38, 3.51it/s] 32%|███▏ | 117339/371472 [9:20:21<19:42:38, 3.58it/s] 32%|███▏ | 117340/371472 [9:20:22<19:58:54, 3.53it/s] {'loss': 3.4457, 'learning_rate': 7.160410696201167e-07, 'epoch': 5.05} + 32%|███▏ | 117340/371472 [9:20:22<19:58:54, 3.53it/s] 32%|███▏ | 117341/371472 [9:20:22<21:25:38, 3.29it/s] 32%|███▏ | 117342/371472 [9:20:22<20:26:15, 3.45it/s] 32%|███▏ | 117343/371472 [9:20:22<19:54:10, 3.55it/s] 32%|███▏ | 117344/371472 [9:20:23<19:21:18, 3.65it/s] 32%|███▏ | 117345/371472 [9:20:23<20:18:32, 3.48it/s] 32%|███▏ | 117346/371472 [9:20:23<20:06:28, 3.51it/s] 32%|███▏ | 117347/371472 [9:20:24<20:26:53, 3.45it/s] 32%|███▏ | 117348/371472 [9:20:24<19:30:19, 3.62it/s] 32%|███▏ | 117349/371472 [9:20:24<20:02:55, 3.52it/s] 32%|███▏ | 117350/371472 [9:20:24<20:36:03, 3.43it/s] 32%|███▏ | 117351/371472 [9:20:25<20:20:23, 3.47it/s] 32%|███▏ | 117352/371472 [9:20:25<21:26:19, 3.29it/s] 32%|███▏ | 117353/371472 [9:20:25<20:35:25, 3.43it/s] 32%|███▏ | 117354/371472 [9:20:26<20:20:35, 3.47it/s] 32%|███▏ | 117355/371472 [9:20:26<19:44:44, 3.57it/s] 32%|███▏ | 117356/371472 [9:20:26<19:22:42, 3.64it/s] 32%|███▏ | 117357/371472 [9:20:26<19:41:17, 3.59it/s] 32%|███▏ | 117358/371472 [9:20:27<19:49:50, 3.56it/s] 32%|███▏ | 117359/371472 [9:20:27<19:29:53, 3.62it/s] 32%|███▏ | 117360/371472 [9:20:27<21:16:01, 3.32it/s] {'loss': 3.209, 'learning_rate': 7.159925876446378e-07, 'epoch': 5.05} + 32%|███▏ | 117360/371472 [9:20:27<21:16:01, 3.32it/s] 32%|███▏ | 117361/371472 [9:20:28<20:07:56, 3.51it/s] 32%|███▏ | 117362/371472 [9:20:28<19:11:58, 3.68it/s] 32%|███▏ | 117363/371472 [9:20:28<19:35:36, 3.60it/s] 32%|███▏ | 117364/371472 [9:20:28<19:16:02, 3.66it/s] 32%|███▏ | 117365/371472 [9:20:29<18:43:07, 3.77it/s] 32%|███▏ | 117366/371472 [9:20:29<18:29:01, 3.82it/s] 32%|███▏ | 117367/371472 [9:20:29<18:49:07, 3.75it/s] 32%|███▏ | 117368/371472 [9:20:29<19:17:38, 3.66it/s] 32%|███▏ | 117369/371472 [9:20:30<19:02:11, 3.71it/s] 32%|███▏ | 117370/371472 [9:20:30<18:56:25, 3.73it/s] 32%|███▏ | 117371/371472 [9:20:30<20:08:49, 3.50it/s] 32%|███�� | 117372/371472 [9:20:31<19:17:59, 3.66it/s] 32%|███▏ | 117373/371472 [9:20:31<19:32:42, 3.61it/s] 32%|███▏ | 117374/371472 [9:20:31<19:21:07, 3.65it/s] 32%|███▏ | 117375/371472 [9:20:31<21:12:28, 3.33it/s] 32%|███▏ | 117376/371472 [9:20:32<20:38:24, 3.42it/s] 32%|███▏ | 117377/371472 [9:20:32<20:21:48, 3.47it/s] 32%|███▏ | 117378/371472 [9:20:32<19:59:03, 3.53it/s] 32%|███▏ | 117379/371472 [9:20:33<19:18:37, 3.66it/s] 32%|███▏ | 117380/371472 [9:20:33<19:10:09, 3.68it/s] {'loss': 3.4142, 'learning_rate': 7.15944105669159e-07, 'epoch': 5.06} + 32%|███▏ | 117380/371472 [9:20:33<19:10:09, 3.68it/s] 32%|███▏ | 117381/371472 [9:20:33<20:00:18, 3.53it/s] 32%|███▏ | 117382/371472 [9:20:33<19:39:39, 3.59it/s] 32%|███▏ | 117383/371472 [9:20:34<20:56:10, 3.37it/s] 32%|███▏ | 117384/371472 [9:20:34<23:24:37, 3.01it/s] 32%|███▏ | 117385/371472 [9:20:34<21:53:11, 3.22it/s] 32%|███▏ | 117386/371472 [9:20:35<21:37:35, 3.26it/s] 32%|███▏ | 117387/371472 [9:20:35<21:58:05, 3.21it/s] 32%|███▏ | 117388/371472 [9:20:35<20:46:41, 3.40it/s] 32%|███▏ | 117389/371472 [9:20:36<20:56:56, 3.37it/s] 32%|███▏ | 117390/371472 [9:20:36<20:51:13, 3.38it/s] 32%|███▏ | 117391/371472 [9:20:36<20:08:16, 3.50it/s] 32%|███▏ | 117392/371472 [9:20:36<19:28:38, 3.62it/s] 32%|███▏ | 117393/371472 [9:20:37<19:34:58, 3.60it/s] 32%|███▏ | 117394/371472 [9:20:37<19:56:53, 3.54it/s] 32%|███▏ | 117395/371472 [9:20:37<19:33:25, 3.61it/s] 32%|███▏ | 117396/371472 [9:20:38<20:14:25, 3.49it/s] 32%|███▏ | 117397/371472 [9:20:38<21:09:29, 3.34it/s] 32%|███▏ | 117398/371472 [9:20:38<21:04:31, 3.35it/s] 32%|███▏ | 117399/371472 [9:20:38<19:48:41, 3.56it/s] 32%|███▏ | 117400/371472 [9:20:39<19:43:48, 3.58it/s] {'loss': 3.3758, 'learning_rate': 7.158956236936801e-07, 'epoch': 5.06} + 32%|███▏ | 117400/371472 [9:20:39<19:43:48, 3.58it/s] 32%|███▏ | 117401/371472 [9:20:39<19:03:46, 3.70it/s] 32%|███▏ | 117402/371472 [9:20:39<19:05:43, 3.70it/s] 32%|███▏ | 117403/371472 [9:20:40<19:57:37, 3.54it/s] 32%|███▏ | 117404/371472 [9:20:40<19:29:03, 3.62it/s] 32%|███▏ | 117405/371472 [9:20:40<18:55:17, 3.73it/s] 32%|███▏ | 117406/371472 [9:20:40<21:00:47, 3.36it/s] 32%|███▏ | 117407/371472 [9:20:41<20:16:53, 3.48it/s] 32%|███▏ | 117408/371472 [9:20:41<19:50:30, 3.56it/s] 32%|███▏ | 117409/371472 [9:20:41<19:30:48, 3.62it/s] 32%|███▏ | 117410/371472 [9:20:42<20:24:46, 3.46it/s] 32%|███▏ | 117411/371472 [9:20:42<20:35:40, 3.43it/s] 32%|███▏ | 117412/371472 [9:20:42<19:50:54, 3.56it/s] 32%|███▏ | 117413/371472 [9:20:42<20:46:32, 3.40it/s] 32%|███▏ | 117414/371472 [9:20:43<20:26:05, 3.45it/s] 32%|███▏ | 117415/371472 [9:20:43<20:32:30, 3.44it/s] 32%|███▏ | 117416/371472 [9:20:43<19:57:23, 3.54it/s] 32%|███▏ | 117417/371472 [9:20:44<20:02:56, 3.52it/s] 32%|███▏ | 117418/371472 [9:20:44<19:52:40, 3.55it/s] 32%|███▏ | 117419/371472 [9:20:44<19:40:05, 3.59it/s] 32%|███▏ | 117420/371472 [9:20:44<20:01:56, 3.52it/s] {'loss': 3.4906, 'learning_rate': 7.158471417182011e-07, 'epoch': 5.06} + 32%|███▏ | 117420/371472 [9:20:44<20:01:56, 3.52it/s] 32%|███▏ | 117421/371472 [9:20:45<19:50:59, 3.56it/s] 32%|███▏ | 117422/371472 [9:20:45<19:30:38, 3.62it/s] 32%|███▏ | 117423/371472 [9:20:45<20:14:55, 3.49it/s] 32%|███▏ | 117424/371472 [9:20:45<19:32:59, 3.61it/s] 32%|███▏ | 117425/371472 [9:20:46<22:04:00, 3.20it/s] 32%|███▏ | 117426/371472 [9:20:46<21:01:24, 3.36it/s] 32%|███▏ | 117427/371472 [9:20:46<22:22:22, 3.15it/s] 32%|███▏ | 117428/371472 [9:20:47<21:16:41, 3.32it/s] 32%|███▏ | 117429/371472 [9:20:47<20:24:30, 3.46it/s] 32%|███▏ | 117430/371472 [9:20:47<19:55:24, 3.54it/s] 32%|███▏ | 117431/371472 [9:20:48<20:31:04, 3.44it/s] 32%|███▏ | 117432/371472 [9:20:48<19:37:43, 3.60it/s] 32%|███▏ | 117433/371472 [9:20:48<20:07:37, 3.51it/s] 32%|███▏ | 117434/371472 [9:20:49<22:46:36, 3.10it/s] 32%|███▏ | 117435/371472 [9:20:49<21:22:33, 3.30it/s] 32%|███▏ | 117436/371472 [9:20:49<21:03:11, 3.35it/s] 32%|███▏ | 117437/371472 [9:20:49<20:48:57, 3.39it/s] 32%|███▏ | 117438/371472 [9:20:50<20:43:45, 3.40it/s] 32%|███▏ | 117439/371472 [9:20:50<21:06:35, 3.34it/s] 32%|███▏ | 117440/371472 [9:20:50<20:27:29, 3.45it/s] {'loss': 3.3155, 'learning_rate': 7.157986597427223e-07, 'epoch': 5.06} + 32%|███▏ | 117440/371472 [9:20:50<20:27:29, 3.45it/s] 32%|███▏ | 117441/371472 [9:20:51<21:02:17, 3.35it/s] 32%|███▏ | 117442/371472 [9:20:51<20:39:15, 3.42it/s] 32%|███▏ | 117443/371472 [9:20:51<20:37:28, 3.42it/s] 32%|███▏ | 117444/371472 [9:20:51<20:36:40, 3.42it/s] 32%|███▏ | 117445/371472 [9:20:52<20:40:17, 3.41it/s] 32%|███▏ | 117446/371472 [9:20:52<19:58:47, 3.53it/s] 32%|███▏ | 117447/371472 [9:20:52<21:25:12, 3.29it/s] 32%|███▏ | 117448/371472 [9:20:53<21:09:07, 3.34it/s] 32%|███▏ | 117449/371472 [9:20:53<21:55:45, 3.22it/s] 32%|███▏ | 117450/371472 [9:20:53<20:35:07, 3.43it/s] 32%|███▏ | 117451/371472 [9:20:53<20:01:07, 3.52it/s] 32%|███▏ | 117452/371472 [9:20:54<19:29:56, 3.62it/s] 32%|███▏ | 117453/371472 [9:20:54<20:35:05, 3.43it/s] 32%|███▏ | 117454/371472 [9:20:54<20:15:06, 3.48it/s] 32%|███▏ | 117455/371472 [9:20:55<19:19:19, 3.65it/s] 32%|███▏ | 117456/371472 [9:20:55<20:30:01, 3.44it/s] 32%|███▏ | 117457/371472 [9:20:55<21:00:11, 3.36it/s] 32%|███▏ | 117458/371472 [9:20:56<20:34:23, 3.43it/s] 32%|███▏ | 117459/371472 [9:20:56<20:10:47, 3.50it/s] 32%|███▏ | 117460/371472 [9:20:56<20:33:03, 3.43it/s] {'loss': 3.2834, 'learning_rate': 7.157501777672433e-07, 'epoch': 5.06} + 32%|███▏ | 117460/371472 [9:20:56<20:33:03, 3.43it/s] 32%|███▏ | 117461/371472 [9:20:56<21:20:47, 3.31it/s] 32%|███▏ | 117462/371472 [9:20:57<21:34:25, 3.27it/s] 32%|███▏ | 117463/371472 [9:20:57<21:04:48, 3.35it/s] 32%|███▏ | 117464/371472 [9:20:57<21:36:10, 3.27it/s] 32%|███▏ | 117465/371472 [9:20:58<21:07:04, 3.34it/s] 32%|███▏ | 117466/371472 [9:20:58<20:27:12, 3.45it/s] 32%|███▏ | 117467/371472 [9:20:58<20:23:47, 3.46it/s] 32%|███▏ | 117468/371472 [9:20:58<20:15:15, 3.48it/s] 32%|███▏ | 117469/371472 [9:20:59<19:24:04, 3.64it/s] 32%|███▏ | 117470/371472 [9:20:59<19:59:45, 3.53it/s] 32%|███▏ | 117471/371472 [9:20:59<20:04:43, 3.51it/s] 32%|███▏ | 117472/371472 [9:21:00<19:40:53, 3.58it/s] 32%|███▏ | 117473/371472 [9:21:00<19:11:12, 3.68it/s] 32%|███▏ | 117474/371472 [9:21:00<19:02:50, 3.70it/s] 32%|███▏ | 117475/371472 [9:21:00<19:13:05, 3.67it/s] 32%|███▏ | 117476/371472 [9:21:01<19:26:56, 3.63it/s] 32%|███▏ | 117477/371472 [9:21:01<18:59:02, 3.72it/s] 32%|███▏ | 117478/371472 [9:21:01<19:17:13, 3.66it/s] 32%|███▏ | 117479/371472 [9:21:01<19:58:26, 3.53it/s] 32%|███▏ | 117480/371472 [9:21:02<19:43:49, 3.58it/s] {'loss': 3.2455, 'learning_rate': 7.157016957917645e-07, 'epoch': 5.06} + 32%|███▏ | 117480/371472 [9:21:02<19:43:49, 3.58it/s] 32%|███▏ | 117481/371472 [9:21:02<19:43:58, 3.58it/s] 32%|███▏ | 117482/371472 [9:21:02<20:47:31, 3.39it/s] 32%|███▏ | 117483/371472 [9:21:03<20:21:14, 3.47it/s] 32%|███▏ | 117484/371472 [9:21:03<22:13:22, 3.17it/s] 32%|███▏ | 117485/371472 [9:21:03<20:54:41, 3.37it/s] 32%|███▏ | 117486/371472 [9:21:04<20:51:55, 3.38it/s] 32%|███▏ | 117487/371472 [9:21:04<20:22:49, 3.46it/s] 32%|███▏ | 117488/371472 [9:21:04<21:12:22, 3.33it/s] 32%|███▏ | 117489/371472 [9:21:04<20:14:12, 3.49it/s] 32%|███▏ | 117490/371472 [9:21:05<20:08:23, 3.50it/s] 32%|███▏ | 117491/371472 [9:21:05<19:42:20, 3.58it/s] 32%|███▏ | 117492/371472 [9:21:05<19:39:06, 3.59it/s] 32%|███▏ | 117493/371472 [9:21:06<19:19:46, 3.65it/s] 32%|███▏ | 117494/371472 [9:21:06<18:50:10, 3.75it/s] 32%|███▏ | 117495/371472 [9:21:06<19:35:28, 3.60it/s] 32%|███▏ | 117496/371472 [9:21:06<19:10:23, 3.68it/s] 32%|███▏ | 117497/371472 [9:21:07<19:30:10, 3.62it/s] 32%|███▏ | 117498/371472 [9:21:07<19:50:22, 3.56it/s] 32%|███▏ | 117499/371472 [9:21:07<20:13:32, 3.49it/s] 32%|███▏ | 117500/371472 [9:21:07<20:13:10, 3.49it/s] {'loss': 3.2418, 'learning_rate': 7.156532138162855e-07, 'epoch': 5.06} + 32%|███▏ | 117500/371472 [9:21:07<20:13:10, 3.49it/s] 32%|███▏ | 117501/371472 [9:21:08<20:04:36, 3.51it/s] 32%|███▏ | 117502/371472 [9:21:08<19:46:29, 3.57it/s] 32%|███▏ | 117503/371472 [9:21:08<20:13:20, 3.49it/s] 32%|███▏ | 117504/371472 [9:21:09<20:18:33, 3.47it/s] 32%|███▏ | 117505/371472 [9:21:09<19:27:18, 3.63it/s] 32%|███▏ | 117506/371472 [9:21:09<19:37:06, 3.60it/s] 32%|███▏ | 117507/371472 [9:21:09<19:11:46, 3.67it/s] 32%|███▏ | 117508/371472 [9:21:10<19:14:42, 3.67it/s] 32%|███▏ | 117509/371472 [9:21:10<20:30:18, 3.44it/s] 32%|███▏ | 117510/371472 [9:21:10<21:37:08, 3.26it/s] 32%|███▏ | 117511/371472 [9:21:11<20:38:17, 3.42it/s] 32%|███▏ | 117512/371472 [9:21:11<19:52:44, 3.55it/s] 32%|███▏ | 117513/371472 [9:21:11<20:01:41, 3.52it/s] 32%|███▏ | 117514/371472 [9:21:11<19:46:46, 3.57it/s] 32%|███▏ | 117515/371472 [9:21:12<19:24:21, 3.64it/s] 32%|███▏ | 117516/371472 [9:21:12<19:14:53, 3.66it/s] 32%|███▏ | 117517/371472 [9:21:12<19:11:55, 3.67it/s] 32%|███▏ | 117518/371472 [9:21:13<19:10:57, 3.68it/s] 32%|███▏ | 117519/371472 [9:21:13<18:56:42, 3.72it/s] 32%|███▏ | 117520/371472 [9:21:13<20:24:42, 3.46it/s] {'loss': 3.369, 'learning_rate': 7.156047318408067e-07, 'epoch': 5.06} + 32%|███▏ | 117520/371472 [9:21:13<20:24:42, 3.46it/s] 32%|███▏ | 117521/371472 [9:21:13<20:43:56, 3.40it/s] 32%|███▏ | 117522/371472 [9:21:14<20:06:57, 3.51it/s] 32%|███▏ | 117523/371472 [9:21:14<21:23:40, 3.30it/s] 32%|███▏ | 117524/371472 [9:21:14<20:36:35, 3.42it/s] 32%|███▏ | 117525/371472 [9:21:15<20:00:22, 3.53it/s] 32%|███▏ | 117526/371472 [9:21:15<19:11:08, 3.68it/s] 32%|███▏ | 117527/371472 [9:21:15<19:29:15, 3.62it/s] 32%|███▏ | 117528/371472 [9:21:15<20:03:22, 3.52it/s] 32%|███▏ | 117529/371472 [9:21:16<20:35:14, 3.43it/s] 32%|███▏ | 117530/371472 [9:21:16<19:50:33, 3.55it/s] 32%|███▏ | 117531/371472 [9:21:16<21:20:57, 3.30it/s] 32%|███▏ | 117532/371472 [9:21:17<22:35:43, 3.12it/s] 32%|███▏ | 117533/371472 [9:21:17<21:34:43, 3.27it/s] 32%|███▏ | 117534/371472 [9:21:17<21:45:43, 3.24it/s] 32%|███▏ | 117535/371472 [9:21:18<22:41:06, 3.11it/s] 32%|███▏ | 117536/371472 [9:21:18<21:15:10, 3.32it/s] 32%|███▏ | 117537/371472 [9:21:18<20:38:51, 3.42it/s] 32%|███▏ | 117538/371472 [9:21:18<20:08:13, 3.50it/s] 32%|███▏ | 117539/371472 [9:21:19<19:46:03, 3.57it/s] 32%|███▏ | 117540/371472 [9:21:19<19:31:16, 3.61it/s] {'loss': 3.35, 'learning_rate': 7.155562498653279e-07, 'epoch': 5.06} + 32%|███▏ | 117540/371472 [9:21:19<19:31:16, 3.61it/s] 32%|███▏ | 117541/371472 [9:21:19<19:32:53, 3.61it/s] 32%|███▏ | 117542/371472 [9:21:20<22:15:02, 3.17it/s] 32%|███▏ | 117543/371472 [9:21:20<21:02:08, 3.35it/s] 32%|███▏ | 117544/371472 [9:21:20<22:17:05, 3.17it/s] 32%|███▏ | 117545/371472 [9:21:21<21:43:48, 3.25it/s] 32%|███▏ | 117546/371472 [9:21:21<21:03:20, 3.35it/s] 32%|███▏ | 117547/371472 [9:21:21<22:50:53, 3.09it/s] 32%|███▏ | 117548/371472 [9:21:21<21:46:08, 3.24it/s] 32%|███▏ | 117549/371472 [9:21:22<21:04:56, 3.35it/s] 32%|███▏ | 117550/371472 [9:21:22<21:40:29, 3.25it/s] 32%|███▏ | 117551/371472 [9:21:22<21:39:25, 3.26it/s] 32%|███▏ | 117552/371472 [9:21:23<20:35:08, 3.43it/s] 32%|███▏ | 117553/371472 [9:21:23<20:55:37, 3.37it/s] 32%|███▏ | 117554/371472 [9:21:23<20:07:09, 3.51it/s] 32%|███▏ | 117555/371472 [9:21:23<19:49:09, 3.56it/s] 32%|███▏ | 117556/371472 [9:21:24<19:36:20, 3.60it/s] 32%|███▏ | 117557/371472 [9:21:24<19:26:19, 3.63it/s] 32%|███▏ | 117558/371472 [9:21:24<19:46:37, 3.57it/s] 32%|███▏ | 117559/371472 [9:21:25<20:52:49, 3.38it/s] 32%|███▏ | 117560/371472 [9:21:25<20:29:20, 3.44it/s] {'loss': 3.3948, 'learning_rate': 7.15507767889849e-07, 'epoch': 5.06} + 32%|███▏ | 117560/371472 [9:21:25<20:29:20, 3.44it/s] 32%|███▏ | 117561/371472 [9:21:25<21:16:53, 3.31it/s] 32%|███▏ | 117562/371472 [9:21:26<20:54:04, 3.37it/s] 32%|███▏ | 117563/371472 [9:21:26<20:09:27, 3.50it/s] 32%|███▏ | 117564/371472 [9:21:26<20:09:56, 3.50it/s] 32%|███▏ | 117565/371472 [9:21:26<19:55:01, 3.54it/s] 32%|███▏ | 117566/371472 [9:21:27<20:36:31, 3.42it/s] 32%|███▏ | 117567/371472 [9:21:27<20:19:52, 3.47it/s] 32%|███▏ | 117568/371472 [9:21:27<19:42:07, 3.58it/s] 32%|███▏ | 117569/371472 [9:21:27<19:51:39, 3.55it/s] 32%|███▏ | 117570/371472 [9:21:28<19:47:20, 3.56it/s] 32%|███▏ | 117571/371472 [9:21:28<19:56:45, 3.54it/s] 32%|███▏ | 117572/371472 [9:21:28<20:44:59, 3.40it/s] 32%|███▏ | 117573/371472 [9:21:29<21:42:18, 3.25it/s] 32%|███▏ | 117574/371472 [9:21:29<20:32:15, 3.43it/s] 32%|███▏ | 117575/371472 [9:21:29<20:24:58, 3.45it/s] 32%|███▏ | 117576/371472 [9:21:30<20:33:05, 3.43it/s] 32%|███▏ | 117577/371472 [9:21:30<19:59:44, 3.53it/s] 32%|███▏ | 117578/371472 [9:21:30<19:38:28, 3.59it/s] 32%|███▏ | 117579/371472 [9:21:30<19:17:17, 3.66it/s] 32%|███▏ | 117580/371472 [9:21:31<20:11:45, 3.49it/s] {'loss': 3.1273, 'learning_rate': 7.154592859143699e-07, 'epoch': 5.06} + 32%|███▏ | 117580/371472 [9:21:31<20:11:45, 3.49it/s] 32%|███▏ | 117581/371472 [9:21:31<20:16:59, 3.48it/s] 32%|███▏ | 117582/371472 [9:21:31<20:44:36, 3.40it/s] 32%|███▏ | 117583/371472 [9:21:32<20:44:12, 3.40it/s] 32%|███▏ | 117584/371472 [9:21:32<20:48:50, 3.39it/s] 32%|███▏ | 117585/371472 [9:21:32<19:56:08, 3.54it/s] 32%|███▏ | 117586/371472 [9:21:32<19:50:56, 3.55it/s] 32%|███▏ | 117587/371472 [9:21:33<19:34:49, 3.60it/s] 32%|███▏ | 117588/371472 [9:21:33<19:12:04, 3.67it/s] 32%|███▏ | 117589/371472 [9:21:33<22:17:12, 3.16it/s] 32%|███▏ | 117590/371472 [9:21:34<21:15:24, 3.32it/s] 32%|███▏ | 117591/371472 [9:21:34<21:28:13, 3.28it/s] 32%|███▏ | 117592/371472 [9:21:34<21:28:20, 3.28it/s] 32%|███▏ | 117593/371472 [9:21:35<21:20:23, 3.30it/s] 32%|███▏ | 117594/371472 [9:21:35<23:07:55, 3.05it/s] 32%|███▏ | 117595/371472 [9:21:35<22:30:39, 3.13it/s] 32%|███▏ | 117596/371472 [9:21:35<21:23:13, 3.30it/s] 32%|███▏ | 117597/371472 [9:21:36<21:53:49, 3.22it/s] 32%|███▏ | 117598/371472 [9:21:36<21:03:17, 3.35it/s] 32%|███▏ | 117599/371472 [9:21:36<20:11:03, 3.49it/s] 32%|███▏ | 117600/371472 [9:21:37<21:26:35, 3.29it/s] {'loss': 3.3413, 'learning_rate': 7.154108039388911e-07, 'epoch': 5.07} + 32%|███▏ | 117600/371472 [9:21:37<21:26:35, 3.29it/s] 32%|███▏ | 117601/371472 [9:21:37<20:44:35, 3.40it/s] 32%|███▏ | 117602/371472 [9:21:37<19:40:28, 3.58it/s] 32%|███▏ | 117603/371472 [9:21:37<20:27:56, 3.45it/s] 32%|███▏ | 117604/371472 [9:21:38<21:58:35, 3.21it/s] 32%|███▏ | 117605/371472 [9:21:38<22:19:35, 3.16it/s] 32%|███▏ | 117606/371472 [9:21:38<22:05:53, 3.19it/s] 32%|███▏ | 117607/371472 [9:21:39<20:43:30, 3.40it/s] 32%|███▏ | 117608/371472 [9:21:39<20:48:39, 3.39it/s] 32%|███▏ | 117609/371472 [9:21:39<20:32:41, 3.43it/s] 32%|███▏ | 117610/371472 [9:21:40<20:51:39, 3.38it/s] 32%|███▏ | 117611/371472 [9:21:40<20:31:27, 3.44it/s] 32%|███▏ | 117612/371472 [9:21:40<21:43:38, 3.25it/s] 32%|███▏ | 117613/371472 [9:21:41<21:44:59, 3.24it/s] 32%|███▏ | 117614/371472 [9:21:41<21:16:20, 3.31it/s] 32%|███▏ | 117615/371472 [9:21:41<21:08:06, 3.34it/s] 32%|███▏ | 117616/371472 [9:21:41<21:11:20, 3.33it/s] 32%|███▏ | 117617/371472 [9:21:42<20:13:24, 3.49it/s] 32%|███▏ | 117618/371472 [9:21:42<20:19:24, 3.47it/s] 32%|███▏ | 117619/371472 [9:21:42<20:20:54, 3.47it/s] 32%|███▏ | 117620/371472 [9:21:43<21:06:11, 3.34it/s] {'loss': 3.2372, 'learning_rate': 7.153623219634123e-07, 'epoch': 5.07} + 32%|███▏ | 117620/371472 [9:21:43<21:06:11, 3.34it/s] 32%|███▏ | 117621/371472 [9:21:43<20:26:33, 3.45it/s] 32%|███▏ | 117622/371472 [9:21:43<19:49:39, 3.56it/s] 32%|███▏ | 117623/371472 [9:21:43<19:30:35, 3.61it/s] 32%|███▏ | 117624/371472 [9:21:44<20:13:58, 3.49it/s] 32%|███▏ | 117625/371472 [9:21:44<20:09:12, 3.50it/s] 32%|███▏ | 117626/371472 [9:21:44<19:19:30, 3.65it/s] 32%|███▏ | 117627/371472 [9:21:44<18:48:00, 3.75it/s] 32%|███▏ | 117628/371472 [9:21:45<20:12:43, 3.49it/s] 32%|███▏ | 117629/371472 [9:21:45<20:58:55, 3.36it/s] 32%|███▏ | 117630/371472 [9:21:45<19:56:03, 3.54it/s] 32%|███▏ | 117631/371472 [9:21:46<21:07:29, 3.34it/s] 32%|███▏ | 117632/371472 [9:21:46<20:32:56, 3.43it/s] 32%|███▏ | 117633/371472 [9:21:46<19:51:23, 3.55it/s] 32%|███▏ | 117634/371472 [9:21:47<19:38:45, 3.59it/s] 32%|███▏ | 117635/371472 [9:21:47<19:49:19, 3.56it/s] 32%|███▏ | 117636/371472 [9:21:47<20:51:26, 3.38it/s] 32%|███▏ | 117637/371472 [9:21:47<20:03:15, 3.52it/s] 32%|███▏ | 117638/371472 [9:21:48<19:31:10, 3.61it/s] 32%|███▏ | 117639/371472 [9:21:48<19:12:53, 3.67it/s] 32%|███▏ | 117640/371472 [9:21:48<20:36:00, 3.42it/s] {'loss': 3.216, 'learning_rate': 7.153138399879334e-07, 'epoch': 5.07} + 32%|███▏ | 117640/371472 [9:21:48<20:36:00, 3.42it/s] 32%|███▏ | 117641/371472 [9:21:49<21:16:24, 3.31it/s] 32%|███▏ | 117642/371472 [9:21:49<21:04:23, 3.35it/s] 32%|███▏ | 117643/371472 [9:21:49<19:55:42, 3.54it/s] 32%|███▏ | 117644/371472 [9:21:49<20:11:27, 3.49it/s] 32%|███▏ | 117645/371472 [9:21:50<19:53:00, 3.55it/s] 32%|███▏ | 117646/371472 [9:21:50<19:25:59, 3.63it/s] 32%|███▏ | 117647/371472 [9:21:50<21:15:16, 3.32it/s] 32%|███▏ | 117648/371472 [9:21:51<22:36:04, 3.12it/s] 32%|███▏ | 117649/371472 [9:21:51<21:23:38, 3.30it/s] 32%|███▏ | 117650/371472 [9:21:51<20:20:10, 3.47it/s] 32%|███▏ | 117651/371472 [9:21:52<24:57:54, 2.82it/s] 32%|███▏ | 117652/371472 [9:21:52<23:33:10, 2.99it/s] 32%|███▏ | 117653/371472 [9:21:52<21:58:35, 3.21it/s] 32%|███▏ | 117654/371472 [9:21:53<21:39:38, 3.25it/s] 32%|███▏ | 117655/371472 [9:21:53<21:00:02, 3.36it/s] 32%|███▏ | 117656/371472 [9:21:53<21:09:41, 3.33it/s] 32%|███▏ | 117657/371472 [9:21:53<21:43:12, 3.25it/s] 32%|███▏ | 117658/371472 [9:21:54<22:00:16, 3.20it/s] 32%|███▏ | 117659/371472 [9:21:54<21:28:06, 3.28it/s] 32%|███▏ | 117660/371472 [9:21:54<21:07:04, 3.34it/s] {'loss': 3.141, 'learning_rate': 7.152653580124544e-07, 'epoch': 5.07} + 32%|███▏ | 117660/371472 [9:21:54<21:07:04, 3.34it/s] 32%|███▏ | 117661/371472 [9:21:55<20:11:02, 3.49it/s] 32%|███▏ | 117662/371472 [9:21:55<21:55:40, 3.22it/s] 32%|███▏ | 117663/371472 [9:21:55<20:49:28, 3.39it/s] 32%|███▏ | 117664/371472 [9:21:56<20:17:25, 3.47it/s] 32%|███▏ | 117665/371472 [9:21:56<19:28:21, 3.62it/s] 32%|███▏ | 117666/371472 [9:21:56<19:07:24, 3.69it/s] 32%|███▏ | 117667/371472 [9:21:56<19:24:47, 3.63it/s] 32%|███▏ | 117668/371472 [9:21:57<21:12:16, 3.32it/s] 32%|███▏ | 117669/371472 [9:21:57<21:16:11, 3.31it/s] 32%|███▏ | 117670/371472 [9:21:57<20:05:21, 3.51it/s] 32%|███▏ | 117671/371472 [9:21:58<20:42:34, 3.40it/s] 32%|███▏ | 117672/371472 [9:21:58<20:47:57, 3.39it/s] 32%|███▏ | 117673/371472 [9:21:58<21:01:46, 3.35it/s] 32%|███▏ | 117674/371472 [9:21:58<21:30:19, 3.28it/s] 32%|███▏ | 117675/371472 [9:21:59<21:42:19, 3.25it/s] 32%|███▏ | 117676/371472 [9:21:59<21:10:09, 3.33it/s] 32%|███▏ | 117677/371472 [9:21:59<20:11:51, 3.49it/s] 32%|███▏ | 117678/371472 [9:22:00<22:01:59, 3.20it/s] 32%|███▏ | 117679/371472 [9:22:00<22:28:30, 3.14it/s] 32%|███▏ | 117680/371472 [9:22:00<21:24:51, 3.29it/s] {'loss': 3.3666, 'learning_rate': 7.152168760369756e-07, 'epoch': 5.07} + 32%|███▏ | 117680/371472 [9:22:00<21:24:51, 3.29it/s] 32%|███▏ | 117681/371472 [9:22:01<21:06:43, 3.34it/s] 32%|███▏ | 117682/371472 [9:22:01<20:34:14, 3.43it/s] 32%|███▏ | 117683/371472 [9:22:01<21:29:45, 3.28it/s] 32%|███▏ | 117684/371472 [9:22:01<21:03:28, 3.35it/s] 32%|███▏ | 117685/371472 [9:22:02<20:05:40, 3.51it/s] 32%|███▏ | 117686/371472 [9:22:02<19:48:16, 3.56it/s] 32%|███▏ | 117687/371472 [9:22:02<19:46:24, 3.57it/s] 32%|███▏ | 117688/371472 [9:22:03<19:19:54, 3.65it/s] 32%|███▏ | 117689/371472 [9:22:03<19:07:45, 3.69it/s] 32%|███▏ | 117690/371472 [9:22:03<19:04:11, 3.70it/s] 32%|███▏ | 117691/371472 [9:22:03<18:57:39, 3.72it/s] 32%|███▏ | 117692/371472 [9:22:04<18:52:03, 3.74it/s] 32%|███▏ | 117693/371472 [9:22:04<20:17:17, 3.47it/s] 32%|███▏ | 117694/371472 [9:22:04<20:13:25, 3.49it/s] 32%|███▏ | 117695/371472 [9:22:04<19:29:44, 3.62it/s] 32%|███▏ | 117696/371472 [9:22:05<19:17:40, 3.65it/s] 32%|███▏ | 117697/371472 [9:22:05<19:42:24, 3.58it/s] 32%|███▏ | 117698/371472 [9:22:05<19:44:46, 3.57it/s] 32%|███▏ | 117699/371472 [9:22:06<19:22:14, 3.64it/s] 32%|███▏ | 117700/371472 [9:22:06<20:04:40, 3.51it/s] {'loss': 3.2647, 'learning_rate': 7.151683940614967e-07, 'epoch': 5.07} + 32%|███▏ | 117700/371472 [9:22:06<20:04:40, 3.51it/s] 32%|███▏ | 117701/371472 [9:22:06<20:05:55, 3.51it/s] 32%|███▏ | 117702/371472 [9:22:06<19:55:33, 3.54it/s] 32%|███▏ | 117703/371472 [9:22:07<20:36:25, 3.42it/s] 32%|███▏ | 117704/371472 [9:22:07<21:20:06, 3.30it/s] 32%|███▏ | 117705/371472 [9:22:07<20:15:53, 3.48it/s] 32%|███▏ | 117706/371472 [9:22:08<20:28:50, 3.44it/s] 32%|███▏ | 117707/371472 [9:22:08<20:31:51, 3.43it/s] 32%|███▏ | 117708/371472 [9:22:08<20:13:38, 3.48it/s] 32%|███▏ | 117709/371472 [9:22:09<20:38:44, 3.41it/s] 32%|███▏ | 117710/371472 [9:22:09<19:50:12, 3.55it/s] 32%|███▏ | 117711/371472 [9:22:09<20:06:18, 3.51it/s] 32%|███▏ | 117712/371472 [9:22:09<20:09:38, 3.50it/s] 32%|███▏ | 117713/371472 [9:22:10<19:49:38, 3.56it/s] 32%|███▏ | 117714/371472 [9:22:10<20:35:34, 3.42it/s] 32%|███▏ | 117715/371472 [9:22:10<20:14:37, 3.48it/s] 32%|███▏ | 117716/371472 [9:22:10<19:49:06, 3.56it/s] 32%|███▏ | 117717/371472 [9:22:11<20:35:28, 3.42it/s] 32%|███▏ | 117718/371472 [9:22:11<20:03:44, 3.51it/s] 32%|███▏ | 117719/371472 [9:22:11<20:03:45, 3.51it/s] 32%|███▏ | 117720/371472 [9:22:12<19:51:26, 3.55it/s] {'loss': 3.2725, 'learning_rate': 7.151199120860177e-07, 'epoch': 5.07} + 32%|███▏ | 117720/371472 [9:22:12<19:51:26, 3.55it/s] 32%|███▏ | 117721/371472 [9:22:12<19:21:23, 3.64it/s] 32%|███▏ | 117722/371472 [9:22:12<18:55:49, 3.72it/s] 32%|███▏ | 117723/371472 [9:22:12<19:09:03, 3.68it/s] 32%|███▏ | 117724/371472 [9:22:13<19:08:32, 3.68it/s] 32%|███▏ | 117725/371472 [9:22:13<19:03:28, 3.70it/s] 32%|███▏ | 117726/371472 [9:22:13<19:04:41, 3.69it/s] 32%|███▏ | 117727/371472 [9:22:14<20:44:53, 3.40it/s] 32%|███▏ | 117728/371472 [9:22:14<20:05:14, 3.51it/s] 32%|███▏ | 117729/371472 [9:22:14<19:44:42, 3.57it/s] 32%|███▏ | 117730/371472 [9:22:14<19:40:12, 3.58it/s] 32%|███▏ | 117731/371472 [9:22:15<18:56:49, 3.72it/s] 32%|███▏ | 117732/371472 [9:22:15<20:02:54, 3.52it/s] 32%|███▏ | 117733/371472 [9:22:15<20:08:18, 3.50it/s] 32%|███▏ | 117734/371472 [9:22:16<20:05:59, 3.51it/s] 32%|███▏ | 117735/371472 [9:22:16<19:53:59, 3.54it/s] 32%|███▏ | 117736/371472 [9:22:16<20:32:23, 3.43it/s] 32%|███▏ | 117737/371472 [9:22:16<19:51:58, 3.55it/s] 32%|███▏ | 117738/371472 [9:22:17<22:07:33, 3.19it/s] 32%|███▏ | 117739/371472 [9:22:17<24:54:03, 2.83it/s] 32%|███▏ | 117740/371472 [9:22:17<23:35:28, 2.99it/s] {'loss': 3.2857, 'learning_rate': 7.150714301105388e-07, 'epoch': 5.07} + 32%|███▏ | 117740/371472 [9:22:17<23:35:28, 2.99it/s] 32%|███▏ | 117741/371472 [9:22:18<22:15:37, 3.17it/s] 32%|███▏ | 117742/371472 [9:22:18<22:28:02, 3.14it/s] 32%|███▏ | 117743/371472 [9:22:18<21:46:28, 3.24it/s] 32%|███▏ | 117744/371472 [9:22:19<21:04:42, 3.34it/s] 32%|███▏ | 117745/371472 [9:22:19<20:45:58, 3.39it/s] 32%|███▏ | 117746/371472 [9:22:19<20:21:28, 3.46it/s] 32%|███▏ | 117747/371472 [9:22:19<20:02:09, 3.52it/s] 32%|███▏ | 117748/371472 [9:22:20<20:07:19, 3.50it/s] 32%|███▏ | 117749/371472 [9:22:20<21:40:48, 3.25it/s] 32%|███▏ | 117750/371472 [9:22:20<21:57:51, 3.21it/s] 32%|███▏ | 117751/371472 [9:22:21<21:16:13, 3.31it/s] 32%|███▏ | 117752/371472 [9:22:21<20:40:03, 3.41it/s] 32%|███▏ | 117753/371472 [9:22:21<21:06:47, 3.34it/s] 32%|███▏ | 117754/371472 [9:22:22<20:35:53, 3.42it/s] 32%|███▏ | 117755/371472 [9:22:22<19:54:40, 3.54it/s] 32%|███▏ | 117756/371472 [9:22:22<20:59:01, 3.36it/s] 32%|███▏ | 117757/371472 [9:22:22<20:28:10, 3.44it/s] 32%|███▏ | 117758/371472 [9:22:23<20:03:59, 3.51it/s] 32%|███▏ | 117759/371472 [9:22:23<19:08:04, 3.68it/s] 32%|███▏ | 117760/371472 [9:22:23<18:45:10, 3.76it/s] {'loss': 3.3427, 'learning_rate': 7.1502294813506e-07, 'epoch': 5.07} + 32%|███▏ | 117760/371472 [9:22:23<18:45:10, 3.76it/s] 32%|███▏ | 117761/371472 [9:22:24<19:53:43, 3.54it/s] 32%|███▏ | 117762/371472 [9:22:24<19:31:34, 3.61it/s] 32%|███▏ | 117763/371472 [9:22:24<20:22:34, 3.46it/s] 32%|███▏ | 117764/371472 [9:22:24<19:36:14, 3.59it/s] 32%|███▏ | 117765/371472 [9:22:25<18:58:45, 3.71it/s] 32%|███▏ | 117766/371472 [9:22:25<18:35:18, 3.79it/s] 32%|███▏ | 117767/371472 [9:22:25<18:19:18, 3.85it/s] 32%|███▏ | 117768/371472 [9:22:25<17:45:18, 3.97it/s] 32%|███▏ | 117769/371472 [9:22:26<18:55:10, 3.72it/s] 32%|███▏ | 117770/371472 [9:22:26<18:45:48, 3.76it/s] 32%|███▏ | 117771/371472 [9:22:26<18:20:17, 3.84it/s] 32%|███▏ | 117772/371472 [9:22:26<18:06:38, 3.89it/s] 32%|███▏ | 117773/371472 [9:22:27<18:25:58, 3.82it/s] 32%|███▏ | 117774/371472 [9:22:27<18:33:12, 3.80it/s] 32%|███▏ | 117775/371472 [9:22:27<18:36:28, 3.79it/s] 32%|███▏ | 117776/371472 [9:22:27<18:37:05, 3.79it/s] 32%|███▏ | 117777/371472 [9:22:28<18:40:42, 3.77it/s] 32%|███▏ | 117778/371472 [9:22:28<18:47:49, 3.75it/s] 32%|███▏ | 117779/371472 [9:22:28<19:27:49, 3.62it/s] 32%|███▏ | 117780/371472 [9:22:29<19:20:54, 3.64it/s] {'loss': 3.4027, 'learning_rate': 7.149744661595812e-07, 'epoch': 5.07} + 32%|███▏ | 117780/371472 [9:22:29<19:20:54, 3.64it/s] 32%|███▏ | 117781/371472 [9:22:29<18:54:09, 3.73it/s] 32%|███▏ | 117782/371472 [9:22:29<18:56:27, 3.72it/s] 32%|███▏ | 117783/371472 [9:22:29<18:27:00, 3.82it/s] 32%|███▏ | 117784/371472 [9:22:30<18:58:19, 3.71it/s] 32%|███▏ | 117785/371472 [9:22:30<19:08:34, 3.68it/s] 32%|███▏ | 117786/371472 [9:22:30<18:26:49, 3.82it/s] 32%|███▏ | 117787/371472 [9:22:30<18:28:43, 3.81it/s] 32%|███▏ | 117788/371472 [9:22:31<21:07:37, 3.34it/s] 32%|███▏ | 117789/371472 [9:22:31<22:01:59, 3.20it/s] 32%|███▏ | 117790/371472 [9:22:31<20:59:50, 3.36it/s] 32%|███▏ | 117791/371472 [9:22:32<20:25:52, 3.45it/s] 32%|███▏ | 117792/371472 [9:22:32<20:20:50, 3.46it/s] 32%|███▏ | 117793/371472 [9:22:32<20:16:46, 3.47it/s] 32%|███▏ | 117794/371472 [9:22:33<20:41:12, 3.41it/s] 32%|███▏ | 117795/371472 [9:22:33<20:09:32, 3.50it/s] 32%|███▏ | 117796/371472 [9:22:33<19:53:05, 3.54it/s] 32%|███▏ | 117797/371472 [9:22:33<19:19:58, 3.64it/s] 32%|███▏ | 117798/371472 [9:22:34<20:18:03, 3.47it/s] 32%|███▏ | 117799/371472 [9:22:34<21:03:32, 3.35it/s] 32%|███▏ | 117800/371472 [9:22:34<20:20:52, 3.46it/s] {'loss': 3.2014, 'learning_rate': 7.149259841841022e-07, 'epoch': 5.07} + 32%|███▏ | 117800/371472 [9:22:34<20:20:52, 3.46it/s] 32%|███▏ | 117801/371472 [9:22:35<19:40:01, 3.58it/s] 32%|███▏ | 117802/371472 [9:22:35<19:28:14, 3.62it/s] 32%|███▏ | 117803/371472 [9:22:35<20:04:55, 3.51it/s] 32%|███▏ | 117804/371472 [9:22:35<19:39:52, 3.58it/s] 32%|███▏ | 117805/371472 [9:22:36<21:35:32, 3.26it/s] 32%|███▏ | 117806/371472 [9:22:36<20:38:06, 3.41it/s] 32%|███▏ | 117807/371472 [9:22:36<20:13:20, 3.48it/s] 32%|███▏ | 117808/371472 [9:22:37<21:28:11, 3.28it/s] 32%|███▏ | 117809/371472 [9:22:37<21:26:54, 3.29it/s] 32%|███▏ | 117810/371472 [9:22:37<20:59:56, 3.36it/s] 32%|███▏ | 117811/371472 [9:22:37<19:51:07, 3.55it/s] 32%|███▏ | 117812/371472 [9:22:38<19:44:20, 3.57it/s] 32%|███▏ | 117813/371472 [9:22:38<19:57:58, 3.53it/s] 32%|███▏ | 117814/371472 [9:22:38<20:55:04, 3.37it/s] 32%|███▏ | 117815/371472 [9:22:39<20:10:14, 3.49it/s] 32%|███▏ | 117816/371472 [9:22:39<20:33:33, 3.43it/s] 32%|███▏ | 117817/371472 [9:22:39<21:14:33, 3.32it/s] 32%|███▏ | 117818/371472 [9:22:40<20:22:57, 3.46it/s] 32%|███▏ | 117819/371472 [9:22:40<20:11:25, 3.49it/s] 32%|███▏ | 117820/371472 [9:22:40<19:40:15, 3.58it/s] {'loss': 3.3583, 'learning_rate': 7.148775022086233e-07, 'epoch': 5.07} + 32%|███▏ | 117820/371472 [9:22:40<19:40:15, 3.58it/s] 32%|███▏ | 117821/371472 [9:22:40<20:51:25, 3.38it/s] 32%|███▏ | 117822/371472 [9:22:41<21:03:12, 3.35it/s] 32%|███▏ | 117823/371472 [9:22:41<21:31:27, 3.27it/s] 32%|███▏ | 117824/371472 [9:22:41<20:29:03, 3.44it/s] 32%|███▏ | 117825/371472 [9:22:42<20:39:50, 3.41it/s] 32%|███▏ | 117826/371472 [9:22:42<21:20:32, 3.30it/s] 32%|███▏ | 117827/371472 [9:22:42<21:27:48, 3.28it/s] 32%|███▏ | 117828/371472 [9:22:42<20:53:56, 3.37it/s] 32%|███▏ | 117829/371472 [9:22:43<21:24:10, 3.29it/s] 32%|███▏ | 117830/371472 [9:22:43<21:11:58, 3.32it/s] 32%|███▏ | 117831/371472 [9:22:43<20:40:48, 3.41it/s] 32%|███▏ | 117832/371472 [9:22:44<20:13:45, 3.48it/s] 32%|███▏ | 117833/371472 [9:22:44<21:27:09, 3.28it/s] 32%|███▏ | 117834/371472 [9:22:44<21:19:42, 3.30it/s] 32%|███▏ | 117835/371472 [9:22:45<22:00:48, 3.20it/s] 32%|███▏ | 117836/371472 [9:22:45<20:55:40, 3.37it/s] 32%|███▏ | 117837/371472 [9:22:45<20:37:34, 3.42it/s] 32%|███▏ | 117838/371472 [9:22:45<20:51:26, 3.38it/s] 32%|███▏ | 117839/371472 [9:22:46<20:11:23, 3.49it/s] 32%|███▏ | 117840/371472 [9:22:46<20:03:09, 3.51it/s] {'loss': 3.1608, 'learning_rate': 7.148290202331444e-07, 'epoch': 5.08} + 32%|███▏ | 117840/371472 [9:22:46<20:03:09, 3.51it/s] 32%|███▏ | 117841/371472 [9:22:46<19:21:06, 3.64it/s] 32%|███▏ | 117842/371472 [9:22:47<21:28:16, 3.28it/s] 32%|███▏ | 117843/371472 [9:22:47<20:47:25, 3.39it/s] 32%|███▏ | 117844/371472 [9:22:47<20:07:27, 3.50it/s] 32%|███▏ | 117845/371472 [9:22:47<20:13:49, 3.48it/s] 32%|███▏ | 117846/371472 [9:22:48<19:52:35, 3.54it/s] 32%|███▏ | 117847/371472 [9:22:48<19:22:30, 3.64it/s] 32%|███▏ | 117848/371472 [9:22:48<20:25:32, 3.45it/s] 32%|███▏ | 117849/371472 [9:22:49<20:33:39, 3.43it/s] 32%|███▏ | 117850/371472 [9:22:49<21:58:47, 3.21it/s] 32%|███▏ | 117851/371472 [9:22:49<21:05:25, 3.34it/s] 32%|███▏ | 117852/371472 [9:22:50<20:29:45, 3.44it/s] 32%|███▏ | 117853/371472 [9:22:50<20:21:04, 3.46it/s] 32%|███▏ | 117854/371472 [9:22:50<19:58:56, 3.53it/s] 32%|███▏ | 117855/371472 [9:22:50<20:32:01, 3.43it/s] 32%|███▏ | 117856/371472 [9:22:51<20:01:16, 3.52it/s] 32%|███▏ | 117857/371472 [9:22:51<20:15:04, 3.48it/s] 32%|███▏ | 117858/371472 [9:22:51<20:14:54, 3.48it/s] 32%|███▏ | 117859/371472 [9:22:51<19:41:58, 3.58it/s] 32%|███▏ | 117860/371472 [9:22:52<19:34:57, 3.60it/s] {'loss': 3.1793, 'learning_rate': 7.147805382576656e-07, 'epoch': 5.08} + 32%|███▏ | 117860/371472 [9:22:52<19:34:57, 3.60it/s] 32%|███▏ | 117861/371472 [9:22:52<19:36:26, 3.59it/s] 32%|███▏ | 117862/371472 [9:22:52<19:31:02, 3.61it/s] 32%|███▏ | 117863/371472 [9:22:53<18:59:05, 3.71it/s] 32%|███▏ | 117864/371472 [9:22:53<19:07:15, 3.68it/s] 32%|███▏ | 117865/371472 [9:22:53<19:01:50, 3.70it/s] 32%|███▏ | 117866/371472 [9:22:53<18:52:37, 3.73it/s] 32%|███▏ | 117867/371472 [9:22:54<19:20:14, 3.64it/s] 32%|███▏ | 117868/371472 [9:22:54<19:41:37, 3.58it/s] 32%|███▏ | 117869/371472 [9:22:54<19:19:20, 3.65it/s] 32%|███▏ | 117870/371472 [9:22:54<19:03:08, 3.70it/s] 32%|███▏ | 117871/371472 [9:22:55<18:45:34, 3.76it/s] 32%|███▏ | 117872/371472 [9:22:55<19:49:52, 3.55it/s] 32%|███▏ | 117873/371472 [9:22:55<19:34:25, 3.60it/s] 32%|███▏ | 117874/371472 [9:22:56<19:27:50, 3.62it/s] 32%|███▏ | 117875/371472 [9:22:56<20:38:09, 3.41it/s] 32%|███▏ | 117876/371472 [9:22:56<20:24:29, 3.45it/s] 32%|███▏ | 117877/371472 [9:22:56<20:03:21, 3.51it/s] 32%|███▏ | 117878/371472 [9:22:57<22:00:23, 3.20it/s] 32%|███▏ | 117879/371472 [9:22:57<22:25:32, 3.14it/s] 32%|███▏ | 117880/371472 [9:22:58<23:48:24, 2.96it/s] {'loss': 3.2232, 'learning_rate': 7.147320562821865e-07, 'epoch': 5.08} + 32%|███▏ | 117880/371472 [9:22:58<23:48:24, 2.96it/s] 32%|███▏ | 117881/371472 [9:22:58<22:41:32, 3.10it/s] 32%|███▏ | 117882/371472 [9:22:58<21:59:42, 3.20it/s] 32%|███▏ | 117883/371472 [9:22:58<21:31:36, 3.27it/s] 32%|███▏ | 117884/371472 [9:22:59<21:04:39, 3.34it/s] 32%|███▏ | 117885/371472 [9:22:59<20:02:18, 3.52it/s] 32%|███▏ | 117886/371472 [9:22:59<19:32:35, 3.60it/s] 32%|███▏ | 117887/371472 [9:22:59<18:58:45, 3.71it/s] 32%|███▏ | 117888/371472 [9:23:00<19:05:21, 3.69it/s] 32%|███▏ | 117889/371472 [9:23:00<18:46:56, 3.75it/s] 32%|███▏ | 117890/371472 [9:23:00<19:31:07, 3.61it/s] 32%|███▏ | 117891/371472 [9:23:01<19:22:33, 3.64it/s] 32%|███▏ | 117892/371472 [9:23:01<19:57:49, 3.53it/s] 32%|███▏ | 117893/371472 [9:23:01<19:28:08, 3.62it/s] 32%|███▏ | 117894/371472 [9:23:01<19:08:56, 3.68it/s] 32%|███▏ | 117895/371472 [9:23:02<18:49:11, 3.74it/s] 32%|███▏ | 117896/371472 [9:23:02<19:07:16, 3.68it/s] 32%|███▏ | 117897/371472 [9:23:02<20:16:03, 3.48it/s] 32%|███▏ | 117898/371472 [9:23:03<19:23:06, 3.63it/s] 32%|███▏ | 117899/371472 [9:23:03<21:54:19, 3.22it/s] 32%|███▏ | 117900/371472 [9:23:03<21:54:33, 3.21it/s] {'loss': 3.3778, 'learning_rate': 7.146835743067077e-07, 'epoch': 5.08} + 32%|███▏ | 117900/371472 [9:23:03<21:54:33, 3.21it/s] 32%|███▏ | 117901/371472 [9:23:03<20:48:39, 3.38it/s] 32%|███▏ | 117902/371472 [9:23:04<20:47:57, 3.39it/s] 32%|███▏ | 117903/371472 [9:23:04<20:07:11, 3.50it/s] 32%|███▏ | 117904/371472 [9:23:04<19:17:04, 3.65it/s] 32%|███▏ | 117905/371472 [9:23:05<18:38:33, 3.78it/s] 32%|███▏ | 117906/371472 [9:23:05<19:20:59, 3.64it/s] 32%|███▏ | 117907/371472 [9:23:05<19:17:12, 3.65it/s] 32%|███▏ | 117908/371472 [9:23:05<20:04:10, 3.51it/s] 32%|███▏ | 117909/371472 [9:23:06<19:42:56, 3.57it/s] 32%|███▏ | 117910/371472 [9:23:06<18:58:44, 3.71it/s] 32%|███▏ | 117911/371472 [9:23:06<19:03:31, 3.70it/s] 32%|███▏ | 117912/371472 [9:23:06<19:29:06, 3.61it/s] 32%|███▏ | 117913/371472 [9:23:07<19:44:20, 3.57it/s] 32%|███▏ | 117914/371472 [9:23:07<19:09:49, 3.68it/s] 32%|███▏ | 117915/371472 [9:23:07<19:22:26, 3.64it/s] 32%|███▏ | 117916/371472 [9:23:08<19:33:13, 3.60it/s] 32%|███▏ | 117917/371472 [9:23:08<18:58:27, 3.71it/s] 32%|███▏ | 117918/371472 [9:23:08<20:05:17, 3.51it/s] 32%|███▏ | 117919/371472 [9:23:08<20:04:18, 3.51it/s] 32%|███▏ | 117920/371472 [9:23:09<20:12:19, 3.49it/s] {'loss': 3.4455, 'learning_rate': 7.146350923312289e-07, 'epoch': 5.08} + 32%|███▏ | 117920/371472 [9:23:09<20:12:19, 3.49it/s] 32%|███▏ | 117921/371472 [9:23:09<19:42:50, 3.57it/s] 32%|███▏ | 117922/371472 [9:23:09<20:25:02, 3.45it/s] 32%|███▏ | 117923/371472 [9:23:10<20:14:44, 3.48it/s] 32%|███▏ | 117924/371472 [9:23:10<19:36:33, 3.59it/s] 32%|███▏ | 117925/371472 [9:23:10<20:48:54, 3.38it/s] 32%|███▏ | 117926/371472 [9:23:10<20:30:14, 3.43it/s] 32%|███▏ | 117927/371472 [9:23:11<19:37:40, 3.59it/s] 32%|███▏ | 117928/371472 [9:23:11<19:30:06, 3.61it/s] 32%|███▏ | 117929/371472 [9:23:11<19:09:40, 3.68it/s] 32%|███▏ | 117930/371472 [9:23:12<18:35:45, 3.79it/s] 32%|███▏ | 117931/371472 [9:23:12<18:57:01, 3.72it/s] 32%|███▏ | 117932/371472 [9:23:12<19:27:08, 3.62it/s] 32%|███▏ | 117933/371472 [9:23:12<19:23:18, 3.63it/s] 32%|███▏ | 117934/371472 [9:23:13<19:29:23, 3.61it/s] 32%|███▏ | 117935/371472 [9:23:13<19:50:35, 3.55it/s] 32%|███▏ | 117936/371472 [9:23:13<20:58:54, 3.36it/s] 32%|███▏ | 117937/371472 [9:23:14<20:11:41, 3.49it/s] 32%|███▏ | 117938/371472 [9:23:14<20:22:47, 3.46it/s] 32%|███▏ | 117939/371472 [9:23:14<20:26:50, 3.44it/s] 32%|███▏ | 117940/371472 [9:23:14<20:09:28, 3.49it/s] {'loss': 3.2943, 'learning_rate': 7.1458661035575e-07, 'epoch': 5.08} + 32%|███▏ | 117940/371472 [9:23:14<20:09:28, 3.49it/s] 32%|███▏ | 117941/371472 [9:23:15<19:33:03, 3.60it/s] 32%|███▏ | 117942/371472 [9:23:15<19:16:23, 3.65it/s] 32%|███▏ | 117943/371472 [9:23:15<19:42:54, 3.57it/s] 32%|███▏ | 117944/371472 [9:23:15<19:23:04, 3.63it/s] 32%|███▏ | 117945/371472 [9:23:16<19:14:23, 3.66it/s] 32%|███▏ | 117946/371472 [9:23:16<18:51:36, 3.73it/s] 32%|███▏ | 117947/371472 [9:23:16<18:46:23, 3.75it/s] 32%|███▏ | 117948/371472 [9:23:17<18:46:54, 3.75it/s] 32%|███▏ | 117949/371472 [9:23:17<18:23:21, 3.83it/s] 32%|███▏ | 117950/371472 [9:23:17<18:09:22, 3.88it/s] 32%|███▏ | 117951/371472 [9:23:17<17:38:26, 3.99it/s] 32%|███▏ | 117952/371472 [9:23:18<17:23:50, 4.05it/s] 32%|███▏ | 117953/371472 [9:23:18<17:38:53, 3.99it/s] 32%|███▏ | 117954/371472 [9:23:18<18:26:34, 3.82it/s] 32%|███▏ | 117955/371472 [9:23:18<18:50:12, 3.74it/s] 32%|███▏ | 117956/371472 [9:23:19<19:01:45, 3.70it/s] 32%|███▏ | 117957/371472 [9:23:19<18:42:55, 3.76it/s] 32%|███▏ | 117958/371472 [9:23:19<18:08:46, 3.88it/s] 32%|███▏ | 117959/371472 [9:23:19<17:47:31, 3.96it/s] 32%|███▏ | 117960/371472 [9:23:20<18:26:03, 3.82it/s] {'loss': 3.3868, 'learning_rate': 7.145381283802709e-07, 'epoch': 5.08} + 32%|███▏ | 117960/371472 [9:23:20<18:26:03, 3.82it/s] 32%|███▏ | 117961/371472 [9:23:20<18:18:03, 3.85it/s] 32%|███▏ | 117962/371472 [9:23:20<19:28:50, 3.61it/s] 32%|███▏ | 117963/371472 [9:23:21<20:20:07, 3.46it/s] 32%|███▏ | 117964/371472 [9:23:21<19:53:49, 3.54it/s] 32%|███▏ | 117965/371472 [9:23:21<20:47:59, 3.39it/s] 32%|███▏ | 117966/371472 [9:23:21<20:28:11, 3.44it/s] 32%|███▏ | 117967/371472 [9:23:22<20:31:52, 3.43it/s] 32%|███▏ | 117968/371472 [9:23:22<21:43:57, 3.24it/s] 32%|███▏ | 117969/371472 [9:23:22<20:39:30, 3.41it/s] 32%|███▏ | 117970/371472 [9:23:23<19:30:32, 3.61it/s] 32%|███▏ | 117971/371472 [9:23:23<20:07:08, 3.50it/s] 32%|███▏ | 117972/371472 [9:23:23<20:37:32, 3.41it/s] 32%|███▏ | 117973/371472 [9:23:23<19:54:39, 3.54it/s] 32%|███▏ | 117974/371472 [9:23:24<19:38:07, 3.59it/s] 32%|███▏ | 117975/371472 [9:23:24<20:53:26, 3.37it/s] 32%|███▏ | 117976/371472 [9:23:24<21:16:19, 3.31it/s] 32%|███▏ | 117977/371472 [9:23:25<20:24:51, 3.45it/s] 32%|███▏ | 117978/371472 [9:23:25<20:19:59, 3.46it/s] 32%|███▏ | 117979/371472 [9:23:25<19:39:30, 3.58it/s] 32%|███▏ | 117980/371472 [9:23:25<19:11:34, 3.67it/s] {'loss': 3.2209, 'learning_rate': 7.144896464047921e-07, 'epoch': 5.08} + 32%|███▏ | 117980/371472 [9:23:25<19:11:34, 3.67it/s] 32%|███▏ | 117981/371472 [9:23:26<18:57:21, 3.71it/s] 32%|███▏ | 117982/371472 [9:23:26<19:23:47, 3.63it/s] 32%|███▏ | 117983/371472 [9:23:26<19:10:55, 3.67it/s] 32%|███▏ | 117984/371472 [9:23:26<18:56:16, 3.72it/s] 32%|███▏ | 117985/371472 [9:23:27<19:36:27, 3.59it/s] 32%|███▏ | 117986/371472 [9:23:27<19:23:43, 3.63it/s] 32%|███▏ | 117987/371472 [9:23:27<19:40:47, 3.58it/s] 32%|███▏ | 117988/371472 [9:23:28<19:48:05, 3.56it/s] 32%|███▏ | 117989/371472 [9:23:28<20:22:09, 3.46it/s] 32%|███▏ | 117990/371472 [9:23:28<20:06:03, 3.50it/s] 32%|███▏ | 117991/371472 [9:23:28<20:38:18, 3.41it/s] 32%|███▏ | 117992/371472 [9:23:29<20:39:50, 3.41it/s] 32%|███▏ | 117993/371472 [9:23:29<21:48:21, 3.23it/s] 32%|███▏ | 117994/371472 [9:23:29<20:45:18, 3.39it/s] 32%|███▏ | 117995/371472 [9:23:30<19:51:34, 3.55it/s] 32%|███▏ | 117996/371472 [9:23:30<21:36:23, 3.26it/s] 32%|███▏ | 117997/371472 [9:23:30<20:05:49, 3.50it/s] 32%|███▏ | 117998/371472 [9:23:31<21:14:51, 3.31it/s] 32%|███▏ | 117999/371472 [9:23:31<20:32:44, 3.43it/s] 32%|███▏ | 118000/371472 [9:23:31<19:22:34, 3.63it/s] {'loss': 3.1395, 'learning_rate': 7.144411644293133e-07, 'epoch': 5.08} + 32%|███▏ | 118000/371472 [9:23:31<19:22:34, 3.63it/s] 32%|███▏ | 118001/371472 [9:23:31<19:25:24, 3.62it/s] 32%|███▏ | 118002/371472 [9:23:32<18:45:35, 3.75it/s] 32%|███▏ | 118003/371472 [9:23:32<18:23:06, 3.83it/s] 32%|███▏ | 118004/371472 [9:23:32<18:45:03, 3.75it/s] 32%|███▏ | 118005/371472 [9:23:32<19:18:35, 3.65it/s] 32%|███▏ | 118006/371472 [9:23:33<18:43:35, 3.76it/s] 32%|███▏ | 118007/371472 [9:23:33<18:25:20, 3.82it/s] 32%|███▏ | 118008/371472 [9:23:33<20:26:38, 3.44it/s] 32%|███▏ | 118009/371472 [9:23:34<21:01:12, 3.35it/s] 32%|███▏ | 118010/371472 [9:23:34<21:16:17, 3.31it/s] 32%|███▏ | 118011/371472 [9:23:34<20:16:27, 3.47it/s] 32%|███▏ | 118012/371472 [9:23:34<19:37:05, 3.59it/s] 32%|███▏ | 118013/371472 [9:23:35<19:19:34, 3.64it/s] 32%|███▏ | 118014/371472 [9:23:35<18:55:03, 3.72it/s] 32%|███▏ | 118015/371472 [9:23:35<19:29:14, 3.61it/s] 32%|███▏ | 118016/371472 [9:23:36<20:41:36, 3.40it/s] 32%|███▏ | 118017/371472 [9:23:36<21:26:17, 3.28it/s] 32%|███▏ | 118018/371472 [9:23:36<20:58:39, 3.36it/s] 32%|███▏ | 118019/371472 [9:23:36<20:39:58, 3.41it/s] 32%|███▏ | 118020/371472 [9:23:37<20:23:22, 3.45it/s] {'loss': 3.4147, 'learning_rate': 7.143926824538343e-07, 'epoch': 5.08} + 32%|███▏ | 118020/371472 [9:23:37<20:23:22, 3.45it/s] 32%|███▏ | 118021/371472 [9:23:37<20:12:49, 3.48it/s] 32%|███▏ | 118022/371472 [9:23:37<22:42:40, 3.10it/s] 32%|███▏ | 118023/371472 [9:23:38<21:43:37, 3.24it/s] 32%|███▏ | 118024/371472 [9:23:38<24:20:26, 2.89it/s] 32%|███▏ | 118025/371472 [9:23:38<23:04:13, 3.05it/s] 32%|███▏ | 118026/371472 [9:23:39<24:24:01, 2.89it/s] 32%|███▏ | 118027/371472 [9:23:39<22:31:29, 3.13it/s] 32%|███▏ | 118028/371472 [9:23:39<21:12:01, 3.32it/s] 32%|███▏ | 118029/371472 [9:23:40<20:22:25, 3.46it/s] 32%|███▏ | 118030/371472 [9:23:40<19:57:55, 3.53it/s] 32%|███▏ | 118031/371472 [9:23:40<19:31:48, 3.60it/s] 32%|███▏ | 118032/371472 [9:23:40<20:43:49, 3.40it/s] 32%|███▏ | 118033/371472 [9:23:41<19:43:46, 3.57it/s] 32%|███▏ | 118034/371472 [9:23:41<21:40:05, 3.25it/s] 32%|███▏ | 118035/371472 [9:23:41<20:51:05, 3.38it/s] 32%|███▏ | 118036/371472 [9:23:42<20:40:48, 3.40it/s] 32%|███▏ | 118037/371472 [9:23:42<19:57:09, 3.53it/s] 32%|███▏ | 118038/371472 [9:23:42<19:19:39, 3.64it/s] 32%|███▏ | 118039/371472 [9:23:42<18:53:53, 3.73it/s] 32%|███▏ | 118040/371472 [9:23:43<20:13:11, 3.48it/s] {'loss': 3.372, 'learning_rate': 7.143442004783554e-07, 'epoch': 5.08} + 32%|███▏ | 118040/371472 [9:23:43<20:13:11, 3.48it/s] 32%|███▏ | 118041/371472 [9:23:43<20:50:48, 3.38it/s] 32%|███▏ | 118042/371472 [9:23:43<20:25:32, 3.45it/s] 32%|███▏ | 118043/371472 [9:23:44<20:22:44, 3.45it/s] 32%|███▏ | 118044/371472 [9:23:44<19:41:18, 3.58it/s] 32%|███▏ | 118045/371472 [9:23:44<19:27:59, 3.62it/s] 32%|███▏ | 118046/371472 [9:23:44<20:13:35, 3.48it/s] 32%|███▏ | 118047/371472 [9:23:45<20:22:43, 3.45it/s] 32%|███▏ | 118048/371472 [9:23:45<20:08:11, 3.50it/s] 32%|███▏ | 118049/371472 [9:23:45<20:33:10, 3.43it/s] 32%|███▏ | 118050/371472 [9:23:46<21:34:51, 3.26it/s] 32%|███▏ | 118051/371472 [9:23:46<21:00:18, 3.35it/s] 32%|███▏ | 118052/371472 [9:23:46<22:06:12, 3.18it/s] 32%|███▏ | 118053/371472 [9:23:47<21:20:06, 3.30it/s] 32%|███▏ | 118054/371472 [9:23:47<20:56:02, 3.36it/s] 32%|███▏ | 118055/371472 [9:23:47<20:38:11, 3.41it/s] 32%|███▏ | 118056/371472 [9:23:47<20:35:44, 3.42it/s] 32%|███▏ | 118057/371472 [9:23:48<20:39:37, 3.41it/s] 32%|███▏ | 118058/371472 [9:23:48<20:28:26, 3.44it/s] 32%|███▏ | 118059/371472 [9:23:48<20:45:55, 3.39it/s] 32%|███▏ | 118060/371472 [9:23:49<20:22:08, 3.46it/s] {'loss': 3.3439, 'learning_rate': 7.142957185028766e-07, 'epoch': 5.09} + 32%|███▏ | 118060/371472 [9:23:49<20:22:08, 3.46it/s] 32%|███▏ | 118061/371472 [9:23:49<20:51:22, 3.38it/s] 32%|███▏ | 118062/371472 [9:23:49<20:56:37, 3.36it/s] 32%|███▏ | 118063/371472 [9:23:50<20:41:30, 3.40it/s] 32%|███▏ | 118064/371472 [9:23:50<20:40:46, 3.40it/s] 32%|███▏ | 118065/371472 [9:23:50<20:23:36, 3.45it/s] 32%|███▏ | 118066/371472 [9:23:50<20:14:47, 3.48it/s] 32%|███▏ | 118067/371472 [9:23:51<21:31:47, 3.27it/s] 32%|███▏ | 118068/371472 [9:23:51<20:58:07, 3.36it/s] 32%|███▏ | 118069/371472 [9:23:51<21:33:18, 3.27it/s] 32%|███▏ | 118070/371472 [9:23:52<20:54:16, 3.37it/s] 32%|███▏ | 118071/371472 [9:23:52<22:02:21, 3.19it/s] 32%|███▏ | 118072/371472 [9:23:52<20:53:19, 3.37it/s] 32%|███▏ | 118073/371472 [9:23:52<20:06:43, 3.50it/s] 32%|███▏ | 118074/371472 [9:23:53<20:24:46, 3.45it/s] 32%|███▏ | 118075/371472 [9:23:53<21:09:28, 3.33it/s] 32%|███▏ | 118076/371472 [9:23:53<20:18:53, 3.46it/s] 32%|███▏ | 118077/371472 [9:23:54<19:47:25, 3.56it/s] 32%|███▏ | 118078/371472 [9:23:54<19:51:15, 3.55it/s] 32%|███▏ | 118079/371472 [9:23:54<19:10:02, 3.67it/s] 32%|███▏ | 118080/371472 [9:23:54<19:20:47, 3.64it/s] {'loss': 3.4245, 'learning_rate': 7.142472365273977e-07, 'epoch': 5.09} + 32%|███▏ | 118080/371472 [9:23:54<19:20:47, 3.64it/s] 32%|███▏ | 118081/371472 [9:23:55<19:26:15, 3.62it/s] 32%|███▏ | 118082/371472 [9:23:55<19:11:44, 3.67it/s] 32%|███▏ | 118083/371472 [9:23:55<19:50:38, 3.55it/s] 32%|███▏ | 118084/371472 [9:23:56<19:57:05, 3.53it/s] 32%|███▏ | 118085/371472 [9:23:56<21:27:54, 3.28it/s] 32%|███▏ | 118086/371472 [9:23:56<20:43:40, 3.40it/s] 32%|███▏ | 118087/371472 [9:23:56<20:27:56, 3.44it/s] 32%|███▏ | 118088/371472 [9:23:57<19:54:07, 3.54it/s] 32%|███▏ | 118089/371472 [9:23:57<19:43:32, 3.57it/s] 32%|███▏ | 118090/371472 [9:23:57<19:26:38, 3.62it/s] 32%|███▏ | 118091/371472 [9:23:58<20:34:59, 3.42it/s] 32%|███▏ | 118092/371472 [9:23:58<20:24:43, 3.45it/s] 32%|███▏ | 118093/371472 [9:23:58<20:27:18, 3.44it/s] 32%|███▏ | 118094/371472 [9:23:58<20:22:56, 3.45it/s] 32%|███▏ | 118095/371472 [9:23:59<20:04:08, 3.51it/s] 32%|███▏ | 118096/371472 [9:23:59<19:51:59, 3.54it/s] 32%|███▏ | 118097/371472 [9:23:59<19:23:13, 3.63it/s] 32%|███▏ | 118098/371472 [9:24:00<19:44:01, 3.57it/s] 32%|███▏ | 118099/371472 [9:24:00<20:15:12, 3.48it/s] 32%|███▏ | 118100/371472 [9:24:00<19:31:06, 3.61it/s] {'loss': 3.2875, 'learning_rate': 7.141987545519187e-07, 'epoch': 5.09} + 32%|███▏ | 118100/371472 [9:24:00<19:31:06, 3.61it/s] 32%|███▏ | 118101/371472 [9:24:00<19:06:57, 3.68it/s] 32%|███▏ | 118102/371472 [9:24:01<18:32:06, 3.80it/s] 32%|███▏ | 118103/371472 [9:24:01<18:12:31, 3.87it/s] 32%|███▏ | 118104/371472 [9:24:01<18:30:51, 3.80it/s] 32%|███▏ | 118105/371472 [9:24:01<19:15:02, 3.66it/s] 32%|███▏ | 118106/371472 [9:24:02<19:05:54, 3.69it/s] 32%|███▏ | 118107/371472 [9:24:02<18:33:41, 3.79it/s] 32%|███▏ | 118108/371472 [9:24:02<20:33:41, 3.42it/s] 32%|███▏ | 118109/371472 [9:24:03<22:00:13, 3.20it/s] 32%|███▏ | 118110/371472 [9:24:03<21:58:35, 3.20it/s] 32%|███▏ | 118111/371472 [9:24:03<21:24:01, 3.29it/s] 32%|███▏ | 118112/371472 [9:24:04<21:08:41, 3.33it/s] 32%|███▏ | 118113/371472 [9:24:04<21:24:46, 3.29it/s] 32%|███▏ | 118114/371472 [9:24:04<20:10:24, 3.49it/s] 32%|███▏ | 118115/371472 [9:24:04<20:44:32, 3.39it/s] 32%|███▏ | 118116/371472 [9:24:05<20:59:50, 3.35it/s] 32%|███▏ | 118117/371472 [9:24:05<19:43:07, 3.57it/s] 32%|███▏ | 118118/371472 [9:24:05<19:31:05, 3.61it/s] 32%|███▏ | 118119/371472 [9:24:06<20:14:32, 3.48it/s] 32%|███▏ | 118120/371472 [9:24:06<19:41:30, 3.57it/s] {'loss': 3.3443, 'learning_rate': 7.141502725764398e-07, 'epoch': 5.09} + 32%|███▏ | 118120/371472 [9:24:06<19:41:30, 3.57it/s] 32%|███▏ | 118121/371472 [9:24:06<20:40:00, 3.41it/s] 32%|███▏ | 118122/371472 [9:24:06<20:24:42, 3.45it/s] 32%|███▏ | 118123/371472 [9:24:07<21:51:02, 3.22it/s] 32%|███▏ | 118124/371472 [9:24:07<20:46:45, 3.39it/s] 32%|███▏ | 118125/371472 [9:24:07<19:44:23, 3.57it/s] 32%|███▏ | 118126/371472 [9:24:08<20:57:49, 3.36it/s] 32%|███▏ | 118127/371472 [9:24:08<20:58:59, 3.35it/s] 32%|███▏ | 118128/371472 [9:24:08<21:31:39, 3.27it/s] 32%|███▏ | 118129/371472 [9:24:09<21:16:49, 3.31it/s] 32%|███▏ | 118130/371472 [9:24:09<21:55:04, 3.21it/s] 32%|███▏ | 118131/371472 [9:24:09<21:14:42, 3.31it/s] 32%|███▏ | 118132/371472 [9:24:09<20:17:58, 3.47it/s] 32%|███▏ | 118133/371472 [9:24:10<19:53:53, 3.54it/s] 32%|███▏ | 118134/371472 [9:24:10<19:49:35, 3.55it/s] 32%|███▏ | 118135/371472 [9:24:10<19:49:26, 3.55it/s] 32%|███▏ | 118136/371472 [9:24:11<20:55:02, 3.36it/s] 32%|███▏ | 118137/371472 [9:24:11<21:11:17, 3.32it/s] 32%|███▏ | 118138/371472 [9:24:11<20:10:40, 3.49it/s] 32%|███▏ | 118139/371472 [9:24:11<19:48:30, 3.55it/s] 32%|███▏ | 118140/371472 [9:24:12<20:12:12, 3.48it/s] {'loss': 3.1295, 'learning_rate': 7.14101790600961e-07, 'epoch': 5.09} + 32%|███▏ | 118140/371472 [9:24:12<20:12:12, 3.48it/s] 32%|███▏ | 118141/371472 [9:24:12<19:20:11, 3.64it/s] 32%|███▏ | 118142/371472 [9:24:12<19:11:36, 3.67it/s] 32%|███▏ | 118143/371472 [9:24:13<19:47:13, 3.56it/s] 32%|███▏ | 118144/371472 [9:24:13<19:52:22, 3.54it/s] 32%|███▏ | 118145/371472 [9:24:13<19:37:32, 3.59it/s] 32%|███▏ | 118146/371472 [9:24:13<19:26:07, 3.62it/s] 32%|███▏ | 118147/371472 [9:24:14<19:10:05, 3.67it/s] 32%|███▏ | 118148/371472 [9:24:14<19:32:21, 3.60it/s] 32%|███▏ | 118149/371472 [9:24:14<22:19:54, 3.15it/s] 32%|███▏ | 118150/371472 [9:24:15<21:10:13, 3.32it/s] 32%|███▏ | 118151/371472 [9:24:15<23:19:32, 3.02it/s] 32%|███▏ | 118152/371472 [9:24:15<22:27:36, 3.13it/s] 32%|███▏ | 118153/371472 [9:24:16<21:41:17, 3.24it/s] 32%|███▏ | 118154/371472 [9:24:16<20:15:24, 3.47it/s] 32%|███▏ | 118155/371472 [9:24:16<20:25:59, 3.44it/s] 32%|███▏ | 118156/371472 [9:24:16<20:35:05, 3.42it/s] 32%|███▏ | 118157/371472 [9:24:17<20:34:44, 3.42it/s] 32%|███▏ | 118158/371472 [9:24:17<19:55:08, 3.53it/s] 32%|███▏ | 118159/371472 [9:24:17<20:53:40, 3.37it/s] 32%|███▏ | 118160/371472 [9:24:18<21:07:04, 3.33it/s] {'loss': 3.3251, 'learning_rate': 7.140533086254822e-07, 'epoch': 5.09} + 32%|███▏ | 118160/371472 [9:24:18<21:07:04, 3.33it/s] 32%|███▏ | 118161/371472 [9:24:18<21:04:44, 3.34it/s] 32%|███▏ | 118162/371472 [9:24:18<20:34:34, 3.42it/s] 32%|███▏ | 118163/371472 [9:24:18<20:25:37, 3.44it/s] 32%|███▏ | 118164/371472 [9:24:19<19:47:29, 3.56it/s] 32%|███▏ | 118165/371472 [9:24:19<19:37:15, 3.59it/s] 32%|███▏ | 118166/371472 [9:24:19<19:17:42, 3.65it/s] 32%|███▏ | 118167/371472 [9:24:20<20:31:57, 3.43it/s] 32%|███▏ | 118168/371472 [9:24:20<19:51:15, 3.54it/s] 32%|███▏ | 118169/371472 [9:24:20<20:33:18, 3.42it/s] 32%|███▏ | 118170/371472 [9:24:20<20:16:36, 3.47it/s] 32%|███▏ | 118171/371472 [9:24:21<20:04:04, 3.51it/s] 32%|███▏ | 118172/371472 [9:24:21<20:59:34, 3.35it/s] 32%|███▏ | 118173/371472 [9:24:21<20:39:40, 3.41it/s] 32%|███▏ | 118174/371472 [9:24:22<20:15:34, 3.47it/s] 32%|███▏ | 118175/371472 [9:24:22<19:42:45, 3.57it/s] 32%|███▏ | 118176/371472 [9:24:22<19:08:45, 3.67it/s] 32%|███▏ | 118177/371472 [9:24:22<19:55:25, 3.53it/s] 32%|███▏ | 118178/371472 [9:24:23<19:48:46, 3.55it/s] 32%|███▏ | 118179/371472 [9:24:23<19:43:00, 3.57it/s] 32%|███▏ | 118180/371472 [9:24:23<20:11:55, 3.48it/s] {'loss': 3.5161, 'learning_rate': 7.140048266500032e-07, 'epoch': 5.09} + 32%|███▏ | 118180/371472 [9:24:23<20:11:55, 3.48it/s] 32%|███▏ | 118181/371472 [9:24:24<20:32:38, 3.42it/s] 32%|███▏ | 118182/371472 [9:24:24<20:02:30, 3.51it/s] 32%|███▏ | 118183/371472 [9:24:24<19:50:20, 3.55it/s] 32%|███▏ | 118184/371472 [9:24:24<20:06:03, 3.50it/s] 32%|███▏ | 118185/371472 [9:24:25<19:56:30, 3.53it/s] 32%|███▏ | 118186/371472 [9:24:25<19:57:04, 3.53it/s] 32%|███▏ | 118187/371472 [9:24:25<20:29:05, 3.43it/s] 32%|███▏ | 118188/371472 [9:24:26<19:45:09, 3.56it/s] 32%|███▏ | 118189/371472 [9:24:26<19:03:18, 3.69it/s] 32%|███▏ | 118190/371472 [9:24:26<19:13:32, 3.66it/s] 32%|███▏ | 118191/371472 [9:24:26<19:22:44, 3.63it/s] 32%|███▏ | 118192/371472 [9:24:27<19:31:07, 3.60it/s] 32%|███▏ | 118193/371472 [9:24:27<18:33:19, 3.79it/s] 32%|███▏ | 118194/371472 [9:24:27<19:33:21, 3.60it/s] 32%|███▏ | 118195/371472 [9:24:27<19:18:52, 3.64it/s] 32%|███▏ | 118196/371472 [9:24:28<19:13:04, 3.66it/s] 32%|███▏ | 118197/371472 [9:24:28<19:13:55, 3.66it/s] 32%|███▏ | 118198/371472 [9:24:28<19:42:31, 3.57it/s] 32%|███▏ | 118199/371472 [9:24:29<21:32:09, 3.27it/s] 32%|███▏ | 118200/371472 [9:24:29<20:30:25, 3.43it/s] {'loss': 3.2926, 'learning_rate': 7.139563446745243e-07, 'epoch': 5.09} + 32%|███▏ | 118200/371472 [9:24:29<20:30:25, 3.43it/s] 32%|███▏ | 118201/371472 [9:24:29<20:35:55, 3.42it/s] 32%|███▏ | 118202/371472 [9:24:29<19:54:01, 3.54it/s] 32%|███▏ | 118203/371472 [9:24:30<19:19:59, 3.64it/s] 32%|███▏ | 118204/371472 [9:24:30<18:52:59, 3.73it/s] 32%|███▏ | 118205/371472 [9:24:30<19:57:59, 3.52it/s] 32%|███▏ | 118206/371472 [9:24:31<20:06:06, 3.50it/s] 32%|███▏ | 118207/371472 [9:24:31<19:27:10, 3.62it/s] 32%|███▏ | 118208/371472 [9:24:31<19:01:47, 3.70it/s] 32%|███▏ | 118209/371472 [9:24:31<19:57:01, 3.53it/s] 32%|███▏ | 118210/371472 [9:24:32<20:59:27, 3.35it/s] 32%|███▏ | 118211/371472 [9:24:32<19:42:23, 3.57it/s] 32%|███▏ | 118212/371472 [9:24:32<18:54:29, 3.72it/s] 32%|███▏ | 118213/371472 [9:24:32<18:36:18, 3.78it/s] 32%|███▏ | 118214/371472 [9:24:33<19:35:57, 3.59it/s] 32%|███▏ | 118215/371472 [9:24:33<19:46:54, 3.56it/s] 32%|███▏ | 118216/371472 [9:24:33<20:01:49, 3.51it/s] 32%|███▏ | 118217/371472 [9:24:34<19:35:37, 3.59it/s] 32%|███▏ | 118218/371472 [9:24:34<20:57:49, 3.36it/s] 32%|███▏ | 118219/371472 [9:24:34<20:09:04, 3.49it/s] 32%|███▏ | 118220/371472 [9:24:34<19:28:12, 3.61it/s] {'loss': 3.4842, 'learning_rate': 7.139078626990454e-07, 'epoch': 5.09} + 32%|███▏ | 118220/371472 [9:24:35<19:28:12, 3.61it/s] 32%|███▏ | 118221/371472 [9:24:35<18:50:28, 3.73it/s] 32%|███▏ | 118222/371472 [9:24:35<19:36:29, 3.59it/s] 32%|███▏ | 118223/371472 [9:24:35<19:55:22, 3.53it/s] 32%|███▏ | 118224/371472 [9:24:36<20:27:32, 3.44it/s] 32%|███▏ | 118225/371472 [9:24:36<20:34:16, 3.42it/s] 32%|███▏ | 118226/371472 [9:24:36<20:16:43, 3.47it/s] 32%|███▏ | 118227/371472 [9:24:37<20:42:01, 3.40it/s] 32%|███▏ | 118228/371472 [9:24:37<20:03:33, 3.51it/s] 32%|███▏ | 118229/371472 [9:24:37<19:42:36, 3.57it/s] 32%|███▏ | 118230/371472 [9:24:37<20:31:04, 3.43it/s] 32%|███▏ | 118231/371472 [9:24:38<19:43:02, 3.57it/s] 32%|███▏ | 118232/371472 [9:24:38<19:35:55, 3.59it/s] 32%|███▏ | 118233/371472 [9:24:38<19:04:23, 3.69it/s] 32%|███▏ | 118234/371472 [9:24:38<19:04:37, 3.69it/s] 32%|███▏ | 118235/371472 [9:24:39<19:07:54, 3.68it/s] 32%|███▏ | 118236/371472 [9:24:39<18:37:30, 3.78it/s] 32%|███▏ | 118237/371472 [9:24:39<18:43:43, 3.76it/s] 32%|███▏ | 118238/371472 [9:24:40<18:50:20, 3.73it/s] 32%|███▏ | 118239/371472 [9:24:40<18:29:55, 3.80it/s] 32%|███▏ | 118240/371472 [9:24:40<18:40:42, 3.77it/s] {'loss': 3.2159, 'learning_rate': 7.138593807235666e-07, 'epoch': 5.09} + 32%|███▏ | 118240/371472 [9:24:40<18:40:42, 3.77it/s] 32%|███▏ | 118241/371472 [9:24:40<18:22:39, 3.83it/s] 32%|███▏ | 118242/371472 [9:24:41<18:32:10, 3.79it/s] 32%|███▏ | 118243/371472 [9:24:41<18:33:37, 3.79it/s] 32%|███▏ | 118244/371472 [9:24:41<18:15:35, 3.85it/s] 32%|███▏ | 118245/371472 [9:24:41<19:08:42, 3.67it/s] 32%|███▏ | 118246/371472 [9:24:42<19:04:19, 3.69it/s] 32%|███▏ | 118247/371472 [9:24:42<19:16:29, 3.65it/s] 32%|███▏ | 118248/371472 [9:24:42<19:27:23, 3.62it/s] 32%|███▏ | 118249/371472 [9:24:43<21:00:58, 3.35it/s] 32%|███▏ | 118250/371472 [9:24:43<19:54:44, 3.53it/s] 32%|███▏ | 118251/371472 [9:24:43<20:48:13, 3.38it/s] 32%|███▏ | 118252/371472 [9:24:43<20:01:00, 3.51it/s] 32%|███▏ | 118253/371472 [9:24:44<20:01:20, 3.51it/s] 32%|███▏ | 118254/371472 [9:24:44<19:43:21, 3.57it/s] 32%|███▏ | 118255/371472 [9:24:44<19:35:13, 3.59it/s] 32%|███▏ | 118256/371472 [9:24:44<19:52:41, 3.54it/s] 32%|███▏ | 118257/371472 [9:24:45<19:42:51, 3.57it/s] 32%|███▏ | 118258/371472 [9:24:45<20:09:18, 3.49it/s] 32%|███▏ | 118259/371472 [9:24:45<20:47:07, 3.38it/s] 32%|███▏ | 118260/371472 [9:24:46<20:10:14, 3.49it/s] {'loss': 3.2224, 'learning_rate': 7.138108987480876e-07, 'epoch': 5.09} + 32%|███▏ | 118260/371472 [9:24:46<20:10:14, 3.49it/s] 32%|███▏ | 118261/371472 [9:24:46<20:42:46, 3.40it/s] 32%|███▏ | 118262/371472 [9:24:46<20:27:01, 3.44it/s] 32%|███▏ | 118263/371472 [9:24:47<20:32:14, 3.42it/s] 32%|███▏ | 118264/371472 [9:24:47<21:03:56, 3.34it/s] 32%|███▏ | 118265/371472 [9:24:47<20:39:06, 3.41it/s] 32%|███▏ | 118266/371472 [9:24:47<19:52:14, 3.54it/s] 32%|███▏ | 118267/371472 [9:24:48<19:14:20, 3.66it/s] 32%|███▏ | 118268/371472 [9:24:48<20:07:26, 3.50it/s] 32%|███▏ | 118269/371472 [9:24:48<20:24:33, 3.45it/s] 32%|███▏ | 118270/371472 [9:24:49<20:21:19, 3.46it/s] 32%|███▏ | 118271/371472 [9:24:49<19:38:05, 3.58it/s] 32%|███▏ | 118272/371472 [9:24:49<21:29:38, 3.27it/s] 32%|███▏ | 118273/371472 [9:24:49<20:59:27, 3.35it/s] 32%|███▏ | 118274/371472 [9:24:50<20:59:47, 3.35it/s] 32%|███▏ | 118275/371472 [9:24:50<20:30:21, 3.43it/s] 32%|███▏ | 118276/371472 [9:24:50<19:39:55, 3.58it/s] 32%|███▏ | 118277/371472 [9:24:51<19:47:28, 3.55it/s] 32%|███��� | 118278/371472 [9:24:51<19:45:06, 3.56it/s] 32%|███▏ | 118279/371472 [9:24:51<19:52:33, 3.54it/s] 32%|███▏ | 118280/371472 [9:24:51<19:18:47, 3.64it/s] {'loss': 3.3001, 'learning_rate': 7.137624167726087e-07, 'epoch': 5.09} + 32%|███▏ | 118280/371472 [9:24:51<19:18:47, 3.64it/s] 32%|███▏ | 118281/371472 [9:24:52<19:10:58, 3.67it/s] 32%|███▏ | 118282/371472 [9:24:52<18:57:45, 3.71it/s] 32%|███▏ | 118283/371472 [9:24:52<19:19:34, 3.64it/s] 32%|███▏ | 118284/371472 [9:24:53<20:34:24, 3.42it/s] 32%|███▏ | 118285/371472 [9:24:53<21:14:23, 3.31it/s] 32%|███▏ | 118286/371472 [9:24:53<20:27:38, 3.44it/s] 32%|███▏ | 118287/371472 [9:24:53<20:20:05, 3.46it/s] 32%|███▏ | 118288/371472 [9:24:54<19:56:16, 3.53it/s] 32%|███▏ | 118289/371472 [9:24:54<20:21:36, 3.45it/s] 32%|███▏ | 118290/371472 [9:24:54<20:26:57, 3.44it/s] 32%|███▏ | 118291/371472 [9:24:55<22:10:29, 3.17it/s] 32%|███▏ | 118292/371472 [9:24:55<22:24:30, 3.14it/s] 32%|███▏ | 118293/371472 [9:24:55<22:20:04, 3.15it/s] 32%|███▏ | 118294/371472 [9:24:56<21:03:26, 3.34it/s] 32%|███▏ | 118295/371472 [9:24:56<20:07:26, 3.49it/s] 32%|███▏ | 118296/371472 [9:24:56<19:37:19, 3.58it/s] 32%|███▏ | 118297/371472 [9:24:56<19:46:59, 3.55it/s] 32%|███▏ | 118298/371472 [9:24:57<20:58:20, 3.35it/s] 32%|███▏ | 118299/371472 [9:24:57<20:21:22, 3.45it/s] 32%|███▏ | 118300/371472 [9:24:57<19:44:36, 3.56it/s] {'loss': 3.1671, 'learning_rate': 7.137139347971299e-07, 'epoch': 5.1} + 32%|███▏ | 118300/371472 [9:24:57<19:44:36, 3.56it/s] 32%|███▏ | 118301/371472 [9:24:57<19:28:41, 3.61it/s] 32%|███▏ | 118302/371472 [9:24:58<19:56:48, 3.53it/s] 32%|███▏ | 118303/371472 [9:24:58<20:22:39, 3.45it/s] 32%|███▏ | 118304/371472 [9:24:58<21:10:14, 3.32it/s] 32%|███▏ | 118305/371472 [9:24:59<20:57:45, 3.35it/s] 32%|███▏ | 118306/371472 [9:24:59<20:40:48, 3.40it/s] 32%|███▏ | 118307/371472 [9:24:59<20:22:21, 3.45it/s] 32%|███▏ | 118308/371472 [9:25:00<20:08:45, 3.49it/s] 32%|███▏ | 118309/371472 [9:25:00<19:32:16, 3.60it/s] 32%|███▏ | 118310/371472 [9:25:00<19:06:50, 3.68it/s] 32%|███▏ | 118311/371472 [9:25:00<19:48:36, 3.55it/s] 32%|███▏ | 118312/371472 [9:25:01<19:24:24, 3.62it/s] 32%|███▏ | 118313/371472 [9:25:01<19:30:20, 3.61it/s] 32%|███▏ | 118314/371472 [9:25:01<19:26:04, 3.62it/s] 32%|███▏ | 118315/371472 [9:25:01<18:58:24, 3.71it/s] 32%|███▏ | 118316/371472 [9:25:02<18:40:46, 3.76it/s] 32%|███▏ | 118317/371472 [9:25:02<18:56:58, 3.71it/s] 32%|███▏ | 118318/371472 [9:25:02<19:32:32, 3.60it/s] 32%|███▏ | 118319/371472 [9:25:03<20:02:58, 3.51it/s] 32%|███▏ | 118320/371472 [9:25:03<20:16:41, 3.47it/s] {'loss': 3.2863, 'learning_rate': 7.13665452821651e-07, 'epoch': 5.1} + 32%|███▏ | 118320/371472 [9:25:03<20:16:41, 3.47it/s] 32%|███▏ | 118321/371472 [9:25:03<19:55:38, 3.53it/s] 32%|███▏ | 118322/371472 [9:25:03<19:41:08, 3.57it/s] 32%|███▏ | 118323/371472 [9:25:04<20:00:10, 3.52it/s] 32%|███▏ | 118324/371472 [9:25:04<21:10:38, 3.32it/s] 32%|███▏ | 118325/371472 [9:25:04<20:44:06, 3.39it/s] 32%|███▏ | 118326/371472 [9:25:05<20:13:22, 3.48it/s] 32%|███▏ | 118327/371472 [9:25:05<19:24:28, 3.62it/s] 32%|███▏ | 118328/371472 [9:25:05<18:59:53, 3.70it/s] 32%|███▏ | 118329/371472 [9:25:05<19:03:21, 3.69it/s] 32%|███▏ | 118330/371472 [9:25:06<19:03:18, 3.69it/s] 32%|███▏ | 118331/371472 [9:25:06<19:03:37, 3.69it/s] 32%|███▏ | 118332/371472 [9:25:06<20:23:18, 3.45it/s] 32%|███▏ | 118333/371472 [9:25:07<19:52:01, 3.54it/s] 32%|███▏ | 118334/371472 [9:25:07<19:53:17, 3.54it/s] 32%|███▏ | 118335/371472 [9:25:07<19:57:56, 3.52it/s] 32%|███▏ | 118336/371472 [9:25:07<19:25:49, 3.62it/s] 32%|███▏ | 118337/371472 [9:25:08<20:14:26, 3.47it/s] 32%|███▏ | 118338/371472 [9:25:08<19:27:56, 3.61it/s] 32%|███▏ | 118339/371472 [9:25:08<20:18:04, 3.46it/s] 32%|███▏ | 118340/371472 [9:25:09<20:40:34, 3.40it/s] {'loss': 3.4125, 'learning_rate': 7.13616970846172e-07, 'epoch': 5.1} + 32%|███▏ | 118340/371472 [9:25:09<20:40:34, 3.40it/s] 32%|███▏ | 118341/371472 [9:25:09<20:17:55, 3.46it/s] 32%|███▏ | 118342/371472 [9:25:09<19:25:03, 3.62it/s] 32%|███▏ | 118343/371472 [9:25:09<20:41:45, 3.40it/s] 32%|███▏ | 118344/371472 [9:25:10<20:42:54, 3.39it/s] 32%|███▏ | 118345/371472 [9:25:10<20:23:03, 3.45it/s] 32%|███▏ | 118346/371472 [9:25:10<20:08:44, 3.49it/s] 32%|███▏ | 118347/371472 [9:25:11<19:44:16, 3.56it/s] 32%|███▏ | 118348/371472 [9:25:11<19:22:43, 3.63it/s] 32%|███▏ | 118349/371472 [9:25:11<20:27:28, 3.44it/s] 32%|███▏ | 118350/371472 [9:25:11<20:05:43, 3.50it/s] 32%|███▏ | 118351/371472 [9:25:12<19:37:30, 3.58it/s] 32%|███▏ | 118352/371472 [9:25:12<20:03:43, 3.50it/s] 32%|███▏ | 118353/371472 [9:25:12<19:30:45, 3.60it/s] 32%|███▏ | 118354/371472 [9:25:13<21:11:13, 3.32it/s] 32%|███▏ | 118355/371472 [9:25:13<20:27:20, 3.44it/s] 32%|███▏ | 118356/371472 [9:25:13<19:37:13, 3.58it/s] 32%|███▏ | 118357/371472 [9:25:13<20:42:15, 3.40it/s] 32%|███▏ | 118358/371472 [9:25:14<20:54:50, 3.36it/s] 32%|███▏ | 118359/371472 [9:25:14<20:15:09, 3.47it/s] 32%|███▏ | 118360/371472 [9:25:14<20:00:45, 3.51it/s] {'loss': 3.1658, 'learning_rate': 7.135684888706931e-07, 'epoch': 5.1} + 32%|███▏ | 118360/371472 [9:25:14<20:00:45, 3.51it/s] 32%|███▏ | 118361/371472 [9:25:15<20:21:17, 3.45it/s] 32%|███▏ | 118362/371472 [9:25:15<21:00:16, 3.35it/s] 32%|███▏ | 118363/371472 [9:25:15<20:10:29, 3.48it/s] 32%|███▏ | 118364/371472 [9:25:15<19:46:33, 3.56it/s] 32%|███▏ | 118365/371472 [9:25:16<19:19:58, 3.64it/s] 32%|███▏ | 118366/371472 [9:25:16<18:35:30, 3.78it/s] 32%|███▏ | 118367/371472 [9:25:16<18:38:44, 3.77it/s] 32%|███▏ | 118368/371472 [9:25:16<18:24:06, 3.82it/s] 32%|███▏ | 118369/371472 [9:25:17<19:08:07, 3.67it/s] 32%|███▏ | 118370/371472 [9:25:17<18:28:05, 3.81it/s] 32%|███▏ | 118371/371472 [9:25:17<18:34:06, 3.79it/s] 32%|███▏ | 118372/371472 [9:25:17<18:15:46, 3.85it/s] 32%|███▏ | 118373/371472 [9:25:18<19:46:12, 3.56it/s] 32%|███▏ | 118374/371472 [9:25:18<19:29:02, 3.61it/s] 32%|███▏ | 118375/371472 [9:25:18<19:35:51, 3.59it/s] 32%|███▏ | 118376/371472 [9:25:19<19:48:55, 3.55it/s] 32%|███▏ | 118377/371472 [9:25:19<19:28:24, 3.61it/s] 32%|███▏ | 118378/371472 [9:25:19<20:34:25, 3.42it/s] 32%|███▏ | 118379/371472 [9:25:20<20:00:10, 3.51it/s] 32%|███▏ | 118380/371472 [9:25:20<19:52:12, 3.54it/s] {'loss': 3.4096, 'learning_rate': 7.135200068952143e-07, 'epoch': 5.1} + 32%|███▏ | 118380/371472 [9:25:20<19:52:12, 3.54it/s] 32%|███▏ | 118381/371472 [9:25:20<20:04:33, 3.50it/s] 32%|███▏ | 118382/371472 [9:25:20<19:45:19, 3.56it/s] 32%|███▏ | 118383/371472 [9:25:21<20:01:04, 3.51it/s] 32%|███▏ | 118384/371472 [9:25:21<19:41:06, 3.57it/s] 32%|███▏ | 118385/371472 [9:25:21<19:29:10, 3.61it/s] 32%|███▏ | 118386/371472 [9:25:21<19:44:45, 3.56it/s] 32%|███▏ | 118387/371472 [9:25:22<19:30:42, 3.60it/s] 32%|███▏ | 118388/371472 [9:25:22<19:58:30, 3.52it/s] 32%|███▏ | 118389/371472 [9:25:22<21:36:35, 3.25it/s] 32%|███▏ | 118390/371472 [9:25:23<20:30:06, 3.43it/s] 32%|███▏ | 118391/371472 [9:25:23<20:59:31, 3.35it/s] 32%|███▏ | 118392/371472 [9:25:23<20:44:08, 3.39it/s] 32%|███▏ | 118393/371472 [9:25:24<20:03:51, 3.50it/s] 32%|███▏ | 118394/371472 [9:25:24<20:58:14, 3.35it/s] 32%|███▏ | 118395/371472 [9:25:24<21:38:12, 3.25it/s] 32%|███▏ | 118396/371472 [9:25:24<20:43:44, 3.39it/s] 32%|███▏ | 118397/371472 [9:25:25<20:24:37, 3.44it/s] 32%|███▏ | 118398/371472 [9:25:25<20:25:06, 3.44it/s] 32%|███▏ | 118399/371472 [9:25:25<20:04:20, 3.50it/s] 32%|███▏ | 118400/371472 [9:25:26<22:01:19, 3.19it/s] {'loss': 3.1757, 'learning_rate': 7.134715249197353e-07, 'epoch': 5.1} + 32%|███▏ | 118400/371472 [9:25:26<22:01:19, 3.19it/s] 32%|███▏ | 118401/371472 [9:25:26<21:15:26, 3.31it/s] 32%|███▏ | 118402/371472 [9:25:26<22:03:03, 3.19it/s] 32%|███▏ | 118403/371472 [9:25:27<21:00:50, 3.35it/s] 32%|███▏ | 118404/371472 [9:25:27<21:15:41, 3.31it/s] 32%|███▏ | 118405/371472 [9:25:27<20:40:07, 3.40it/s] 32%|███▏ | 118406/371472 [9:25:27<21:01:42, 3.34it/s] 32%|███▏ | 118407/371472 [9:25:28<21:21:20, 3.29it/s] 32%|███▏ | 118408/371472 [9:25:28<20:27:44, 3.44it/s] 32%|███▏ | 118409/371472 [9:25:28<20:33:02, 3.42it/s] 32%|███▏ | 118410/371472 [9:25:29<20:34:13, 3.42it/s] 32%|███▏ | 118411/371472 [9:25:29<20:17:37, 3.46it/s] 32%|███▏ | 118412/371472 [9:25:29<19:53:10, 3.53it/s] 32%|███▏ | 118413/371472 [9:25:30<21:45:52, 3.23it/s] 32%|███▏ | 118414/371472 [9:25:30<20:39:06, 3.40it/s] 32%|███▏ | 118415/371472 [9:25:30<20:19:30, 3.46it/s] 32%|███▏ | 118416/371472 [9:25:30<19:57:18, 3.52it/s] 32%|███▏ | 118417/371472 [9:25:31<19:51:15, 3.54it/s] 32%|███▏ | 118418/371472 [9:25:31<20:03:24, 3.50it/s] 32%|███▏ | 118419/371472 [9:25:31<20:16:25, 3.47it/s] 32%|███▏ | 118420/371472 [9:25:31<20:02:38, 3.51it/s] {'loss': 3.4005, 'learning_rate': 7.134230429442565e-07, 'epoch': 5.1} + 32%|███▏ | 118420/371472 [9:25:31<20:02:38, 3.51it/s] 32%|███▏ | 118421/371472 [9:25:32<19:25:51, 3.62it/s] 32%|███▏ | 118422/371472 [9:25:32<19:06:33, 3.68it/s] 32%|███▏ | 118423/371472 [9:25:32<19:41:33, 3.57it/s] 32%|███▏ | 118424/371472 [9:25:33<19:06:06, 3.68it/s] 32%|███▏ | 118425/371472 [9:25:33<20:06:41, 3.50it/s] 32%|███▏ | 118426/371472 [9:25:33<20:14:01, 3.47it/s] 32%|███▏ | 118427/371472 [9:25:33<20:03:32, 3.50it/s] 32%|███▏ | 118428/371472 [9:25:34<19:36:38, 3.58it/s] 32%|███▏ | 118429/371472 [9:25:34<18:57:37, 3.71it/s] 32%|███▏ | 118430/371472 [9:25:34<20:40:14, 3.40it/s] 32%|███▏ | 118431/371472 [9:25:35<21:21:59, 3.29it/s] 32%|███▏ | 118432/371472 [9:25:35<20:07:54, 3.49it/s] 32%|███▏ | 118433/371472 [9:25:35<20:02:25, 3.51it/s] 32%|███▏ | 118434/371472 [9:25:35<20:01:18, 3.51it/s] 32%|███▏ | 118435/371472 [9:25:36<19:48:04, 3.55it/s] 32%|███▏ | 118436/371472 [9:25:36<18:57:25, 3.71it/s] 32%|███▏ | 118437/371472 [9:25:36<18:25:54, 3.81it/s] 32%|███▏ | 118438/371472 [9:25:37<20:24:08, 3.45it/s] 32%|███▏ | 118439/371472 [9:25:37<19:32:09, 3.60it/s] 32%|███▏ | 118440/371472 [9:25:37<19:08:36, 3.67it/s] {'loss': 3.4536, 'learning_rate': 7.133745609687776e-07, 'epoch': 5.1} + 32%|███▏ | 118440/371472 [9:25:37<19:08:36, 3.67it/s] 32%|███▏ | 118441/371472 [9:25:37<19:57:33, 3.52it/s] 32%|███▏ | 118442/371472 [9:25:38<20:06:48, 3.49it/s] 32%|███▏ | 118443/371472 [9:25:38<19:49:03, 3.55it/s] 32%|███▏ | 118444/371472 [9:25:38<20:48:04, 3.38it/s] 32%|███▏ | 118445/371472 [9:25:39<20:54:19, 3.36it/s] 32%|███▏ | 118446/371472 [9:25:39<20:45:40, 3.39it/s] 32%|███▏ | 118447/371472 [9:25:39<21:56:59, 3.20it/s] 32%|███▏ | 118448/371472 [9:25:39<20:35:59, 3.41it/s] 32%|███▏ | 118449/371472 [9:25:40<21:14:02, 3.31it/s] 32%|███▏ | 118450/371472 [9:25:40<20:55:05, 3.36it/s] 32%|███▏ | 118451/371472 [9:25:40<20:19:50, 3.46it/s] 32%|███▏ | 118452/371472 [9:25:41<19:31:41, 3.60it/s] 32%|███▏ | 118453/371472 [9:25:41<19:13:14, 3.66it/s] 32%|███▏ | 118454/371472 [9:25:41<18:40:54, 3.76it/s] 32%|███▏ | 118455/371472 [9:25:42<21:27:37, 3.27it/s] 32%|███▏ | 118456/371472 [9:25:42<20:47:46, 3.38it/s] 32%|███▏ | 118457/371472 [9:25:42<19:56:00, 3.53it/s] 32%|███▏ | 118458/371472 [9:25:42<19:33:25, 3.59it/s] 32%|███▏ | 118459/371472 [9:25:43<18:57:55, 3.71it/s] 32%|███▏ | 118460/371472 [9:25:43<18:59:47, 3.70it/s] {'loss': 3.3017, 'learning_rate': 7.133260789932987e-07, 'epoch': 5.1} + 32%|███▏ | 118460/371472 [9:25:43<18:59:47, 3.70it/s] 32%|███▏ | 118461/371472 [9:25:43<19:14:19, 3.65it/s] 32%|███▏ | 118462/371472 [9:25:43<19:21:47, 3.63it/s] 32%|███▏ | 118463/371472 [9:25:44<20:15:59, 3.47it/s] 32%|███▏ | 118464/371472 [9:25:44<20:40:43, 3.40it/s] 32%|███▏ | 118465/371472 [9:25:44<20:45:56, 3.38it/s] 32%|███▏ | 118466/371472 [9:25:45<20:05:43, 3.50it/s] 32%|███▏ | 118467/371472 [9:25:45<19:12:53, 3.66it/s] 32%|███▏ | 118468/371472 [9:25:45<19:11:26, 3.66it/s] 32%|███▏ | 118469/371472 [9:25:45<19:06:46, 3.68it/s] 32%|███▏ | 118470/371472 [9:25:46<19:18:51, 3.64it/s] 32%|███▏ | 118471/371472 [9:25:46<20:18:38, 3.46it/s] 32%|███▏ | 118472/371472 [9:25:46<19:58:22, 3.52it/s] 32%|███▏ | 118473/371472 [9:25:47<20:10:34, 3.48it/s] 32%|███▏ | 118474/371472 [9:25:47<19:54:44, 3.53it/s] 32%|███▏ | 118475/371472 [9:25:47<19:46:03, 3.56it/s] 32%|███▏ | 118476/371472 [9:25:47<19:40:47, 3.57it/s] 32%|███▏ | 118477/371472 [9:25:48<19:23:46, 3.62it/s] 32%|███▏ | 118478/371472 [9:25:48<19:10:13, 3.67it/s] 32%|███▏ | 118479/371472 [9:25:48<18:52:23, 3.72it/s] 32%|███▏ | 118480/371472 [9:25:48<18:45:03, 3.75it/s] {'loss': 3.2372, 'learning_rate': 7.132775970178197e-07, 'epoch': 5.1} + 32%|███▏ | 118480/371472 [9:25:48<18:45:03, 3.75it/s] 32%|███▏ | 118481/371472 [9:25:49<20:27:11, 3.44it/s] 32%|███▏ | 118482/371472 [9:25:49<21:19:14, 3.30it/s] 32%|███▏ | 118483/371472 [9:25:49<21:03:25, 3.34it/s] 32%|███▏ | 118484/371472 [9:25:50<21:41:44, 3.24it/s] 32%|███▏ | 118485/371472 [9:25:50<21:06:55, 3.33it/s] 32%|███▏ | 118486/371472 [9:25:50<21:15:01, 3.31it/s] 32%|███▏ | 118487/371472 [9:25:51<20:26:29, 3.44it/s] 32%|███▏ | 118488/371472 [9:25:51<20:07:25, 3.49it/s] 32%|███▏ | 118489/371472 [9:25:51<21:16:15, 3.30it/s] 32%|███▏ | 118490/371472 [9:25:51<21:05:51, 3.33it/s] 32%|███▏ | 118491/371472 [9:25:52<20:20:16, 3.46it/s] 32%|███▏ | 118492/371472 [9:25:52<21:14:22, 3.31it/s] 32%|███▏ | 118493/371472 [9:25:52<20:46:34, 3.38it/s] 32%|███▏ | 118494/371472 [9:25:53<21:00:47, 3.34it/s] 32%|███▏ | 118495/371472 [9:25:53<20:55:34, 3.36it/s] 32%|███▏ | 118496/371472 [9:25:53<21:42:57, 3.24it/s] 32%|███▏ | 118497/371472 [9:25:54<20:35:30, 3.41it/s] 32%|███▏ | 118498/371472 [9:25:54<21:05:05, 3.33it/s] 32%|███▏ | 118499/371472 [9:25:54<19:50:11, 3.54it/s] 32%|███▏ | 118500/371472 [9:25:54<19:50:21, 3.54it/s] {'loss': 3.1772, 'learning_rate': 7.132291150423408e-07, 'epoch': 5.1} + 32%|███▏ | 118500/371472 [9:25:54<19:50:21, 3.54it/s] 32%|███▏ | 118501/371472 [9:25:55<20:27:31, 3.43it/s] 32%|███▏ | 118502/371472 [9:25:55<19:53:12, 3.53it/s] 32%|███▏ | 118503/371472 [9:25:55<20:01:16, 3.51it/s] 32%|███▏ | 118504/371472 [9:25:56<20:39:44, 3.40it/s] 32%|███▏ | 118505/371472 [9:25:56<20:17:39, 3.46it/s] 32%|███▏ | 118506/371472 [9:25:56<19:29:43, 3.60it/s] 32%|███▏ | 118507/371472 [9:25:56<19:05:30, 3.68it/s] 32%|███▏ | 118508/371472 [9:25:57<18:51:50, 3.72it/s] 32%|███▏ | 118509/371472 [9:25:57<21:11:59, 3.31it/s] 32%|███▏ | 118510/371472 [9:25:57<20:47:01, 3.38it/s] 32%|███▏ | 118511/371472 [9:25:58<23:01:00, 3.05it/s] 32%|███▏ | 118512/371472 [9:25:58<21:39:01, 3.25it/s] 32%|███▏ | 118513/371472 [9:25:58<21:01:22, 3.34it/s] 32%|███▏ | 118514/371472 [9:25:59<21:56:06, 3.20it/s] 32%|███▏ | 118515/371472 [9:25:59<21:20:19, 3.29it/s] 32%|███▏ | 118516/371472 [9:25:59<21:53:20, 3.21it/s] 32%|███▏ | 118517/371472 [9:25:59<20:56:15, 3.36it/s] 32%|███▏ | 118518/371472 [9:26:00<20:11:30, 3.48it/s] 32%|███▏ | 118519/371472 [9:26:00<19:39:44, 3.57it/s] 32%|███▏ | 118520/371472 [9:26:00<19:42:19, 3.57it/s] {'loss': 3.141, 'learning_rate': 7.13180633066862e-07, 'epoch': 5.1} + 32%|███▏ | 118520/371472 [9:26:00<19:42:19, 3.57it/s] 32%|███▏ | 118521/371472 [9:26:01<20:09:25, 3.49it/s] 32%|███▏ | 118522/371472 [9:26:01<19:30:38, 3.60it/s] 32%|███▏ | 118523/371472 [9:26:01<19:09:57, 3.67it/s] 32%|███▏ | 118524/371472 [9:26:01<18:43:06, 3.75it/s] 32%|███▏ | 118525/371472 [9:26:02<21:23:37, 3.28it/s] 32%|███▏ | 118526/371472 [9:26:02<22:00:16, 3.19it/s] 32%|███▏ | 118527/371472 [9:26:02<21:07:45, 3.33it/s] 32%|███▏ | 118528/371472 [9:26:03<20:35:24, 3.41it/s] 32%|███▏ | 118529/371472 [9:26:03<19:28:01, 3.61it/s] 32%|███▏ | 118530/371472 [9:26:03<19:10:43, 3.66it/s] 32%|███▏ | 118531/371472 [9:26:03<19:28:53, 3.61it/s] 32%|███▏ | 118532/371472 [9:26:04<19:03:27, 3.69it/s] 32%|███▏ | 118533/371472 [9:26:04<19:51:55, 3.54it/s] 32%|███▏ | 118534/371472 [9:26:04<21:26:39, 3.28it/s] 32%|███▏ | 118535/371472 [9:26:05<21:31:15, 3.26it/s] 32%|███▏ | 118536/371472 [9:26:05<20:17:00, 3.46it/s] 32%|███▏ | 118537/371472 [9:26:05<20:48:10, 3.38it/s] 32%|███▏ | 118538/371472 [9:26:05<20:56:32, 3.35it/s] 32%|███▏ | 118539/371472 [9:26:06<21:01:56, 3.34it/s] 32%|███▏ | 118540/371472 [9:26:06<20:21:37, 3.45it/s] {'loss': 3.2798, 'learning_rate': 7.131321510913832e-07, 'epoch': 5.11} + 32%|███▏ | 118540/371472 [9:26:06<20:21:37, 3.45it/s] 32%|███▏ | 118541/371472 [9:26:06<20:50:24, 3.37it/s] 32%|███▏ | 118542/371472 [9:26:07<20:29:00, 3.43it/s] 32%|███▏ | 118543/371472 [9:26:07<20:43:57, 3.39it/s] 32%|███▏ | 118544/371472 [9:26:07<20:40:33, 3.40it/s] 32%|███▏ | 118545/371472 [9:26:08<20:02:29, 3.51it/s] 32%|███▏ | 118546/371472 [9:26:08<19:54:00, 3.53it/s] 32%|███▏ | 118547/371472 [9:26:08<19:38:25, 3.58it/s] 32%|███▏ | 118548/371472 [9:26:08<20:00:07, 3.51it/s] 32%|███▏ | 118549/371472 [9:26:09<19:42:28, 3.56it/s] 32%|███▏ | 118550/371472 [9:26:09<20:17:09, 3.46it/s] 32%|███▏ | 118551/371472 [9:26:09<19:38:16, 3.58it/s] 32%|███▏ | 118552/371472 [9:26:09<19:48:05, 3.55it/s] 32%|███▏ | 118553/371472 [9:26:10<19:26:05, 3.61it/s] 32%|███▏ | 118554/371472 [9:26:10<19:58:08, 3.52it/s] 32%|███▏ | 118555/371472 [9:26:10<19:45:41, 3.56it/s] 32%|███▏ | 118556/371472 [9:26:11<20:06:41, 3.49it/s] 32%|███▏ | 118557/371472 [9:26:11<20:08:13, 3.49it/s] 32%|███▏ | 118558/371472 [9:26:11<19:55:29, 3.53it/s] 32%|███▏ | 118559/371472 [9:26:11<20:17:10, 3.46it/s] 32%|███▏ | 118560/371472 [9:26:12<20:04:46, 3.50it/s] {'loss': 3.2898, 'learning_rate': 7.130836691159042e-07, 'epoch': 5.11} + 32%|███▏ | 118560/371472 [9:26:12<20:04:46, 3.50it/s] 32%|███▏ | 118561/371472 [9:26:12<19:43:32, 3.56it/s] 32%|███▏ | 118562/371472 [9:26:12<19:30:57, 3.60it/s] 32%|███▏ | 118563/371472 [9:26:13<20:03:13, 3.50it/s] 32%|███▏ | 118564/371472 [9:26:13<20:32:17, 3.42it/s] 32%|███▏ | 118565/371472 [9:26:13<22:30:19, 3.12it/s] 32%|███▏ | 118566/371472 [9:26:14<21:22:27, 3.29it/s] 32%|███▏ | 118567/371472 [9:26:14<20:37:39, 3.41it/s] 32%|███▏ | 118568/371472 [9:26:14<21:31:03, 3.26it/s] 32%|███▏ | 118569/371472 [9:26:15<23:00:10, 3.05it/s] 32%|███▏ | 118570/371472 [9:26:15<22:19:21, 3.15it/s] 32%|███▏ | 118571/371472 [9:26:15<20:51:15, 3.37it/s] 32%|███▏ | 118572/371472 [9:26:15<20:23:13, 3.45it/s] 32%|███▏ | 118573/371472 [9:26:16<20:28:25, 3.43it/s] 32%|███▏ | 118574/371472 [9:26:16<19:57:34, 3.52it/s] 32%|███▏ | 118575/371472 [9:26:16<20:18:59, 3.46it/s] 32%|███▏ | 118576/371472 [9:26:16<19:32:21, 3.60it/s] 32%|███▏ | 118577/371472 [9:26:17<22:05:40, 3.18it/s] 32%|███▏ | 118578/371472 [9:26:17<21:04:22, 3.33it/s] 32%|███▏ | 118579/371472 [9:26:17<21:19:16, 3.29it/s] 32%|███▏ | 118580/371472 [9:26:18<20:23:32, 3.44it/s] {'loss': 3.4141, 'learning_rate': 7.130351871404253e-07, 'epoch': 5.11} + 32%|███▏ | 118580/371472 [9:26:18<20:23:32, 3.44it/s] 32%|███▏ | 118581/371472 [9:26:18<19:49:35, 3.54it/s] 32%|███▏ | 118582/371472 [9:26:18<20:22:37, 3.45it/s] 32%|███▏ | 118583/371472 [9:26:19<20:19:22, 3.46it/s] 32%|███▏ | 118584/371472 [9:26:19<20:25:39, 3.44it/s] 32%|███▏ | 118585/371472 [9:26:19<19:41:03, 3.57it/s] 32%|███▏ | 118586/371472 [9:26:19<20:21:11, 3.45it/s] 32%|███▏ | 118587/371472 [9:26:20<19:58:47, 3.52it/s] 32%|███▏ | 118588/371472 [9:26:20<19:31:27, 3.60it/s] 32%|███▏ | 118589/371472 [9:26:20<19:28:55, 3.61it/s] 32%|███▏ | 118590/371472 [9:26:20<18:43:13, 3.75it/s] 32%|███▏ | 118591/371472 [9:26:21<20:06:33, 3.49it/s] 32%|███▏ | 118592/371472 [9:26:21<21:56:24, 3.20it/s] 32%|███▏ | 118593/371472 [9:26:21<21:04:10, 3.33it/s] 32%|███▏ | 118594/371472 [9:26:22<20:24:25, 3.44it/s] 32%|███▏ | 118595/371472 [9:26:22<19:51:56, 3.54it/s] 32%|███▏ | 118596/371472 [9:26:22<18:58:16, 3.70it/s] 32%|███▏ | 118597/371472 [9:26:23<20:51:18, 3.37it/s] 32%|███▏ | 118598/371472 [9:26:23<20:31:22, 3.42it/s] 32%|███▏ | 118599/371472 [9:26:23<21:03:45, 3.33it/s] 32%|███▏ | 118600/371472 [9:26:23<20:44:15, 3.39it/s] {'loss': 3.2897, 'learning_rate': 7.129867051649464e-07, 'epoch': 5.11} + 32%|███▏ | 118600/371472 [9:26:23<20:44:15, 3.39it/s] 32%|███▏ | 118601/371472 [9:26:24<20:15:31, 3.47it/s] 32%|███▏ | 118602/371472 [9:26:24<21:05:18, 3.33it/s] 32%|███▏ | 118603/371472 [9:26:24<19:53:18, 3.53it/s] 32%|███▏ | 118604/371472 [9:26:25<19:44:21, 3.56it/s] 32%|███▏ | 118605/371472 [9:26:25<19:09:35, 3.67it/s] 32%|███▏ | 118606/371472 [9:26:25<19:08:24, 3.67it/s] 32%|███▏ | 118607/371472 [9:26:25<20:14:17, 3.47it/s] 32%|███▏ | 118608/371472 [9:26:26<20:31:16, 3.42it/s] 32%|███▏ | 118609/371472 [9:26:26<20:08:01, 3.49it/s] 32%|███▏ | 118610/371472 [9:26:26<19:21:14, 3.63it/s] 32%|███▏ | 118611/371472 [9:26:27<18:40:15, 3.76it/s] 32%|███▏ | 118612/371472 [9:26:27<18:16:48, 3.84it/s] 32%|███▏ | 118613/371472 [9:26:27<18:32:46, 3.79it/s] 32%|███▏ | 118614/371472 [9:26:27<18:44:07, 3.75it/s] 32%|███▏ | 118615/371472 [9:26:28<18:45:56, 3.74it/s] 32%|███▏ | 118616/371472 [9:26:28<18:59:33, 3.70it/s] 32%|███▏ | 118617/371472 [9:26:28<19:43:07, 3.56it/s] 32%|███▏ | 118618/371472 [9:26:28<19:42:27, 3.56it/s] 32%|███▏ | 118619/371472 [9:26:29<20:20:13, 3.45it/s] 32%|███▏ | 118620/371472 [9:26:29<19:59:31, 3.51it/s] {'loss': 3.34, 'learning_rate': 7.129382231894675e-07, 'epoch': 5.11} + 32%|███▏ | 118620/371472 [9:26:29<19:59:31, 3.51it/s] 32%|███▏ | 118621/371472 [9:26:29<19:36:08, 3.58it/s] 32%|███▏ | 118622/371472 [9:26:30<18:47:17, 3.74it/s] 32%|███▏ | 118623/371472 [9:26:30<18:41:46, 3.76it/s] 32%|███▏ | 118624/371472 [9:26:30<18:31:36, 3.79it/s] 32%|███▏ | 118625/371472 [9:26:30<19:29:32, 3.60it/s] 32%|███▏ | 118626/371472 [9:26:31<19:28:03, 3.61it/s] 32%|███▏ | 118627/371472 [9:26:31<19:25:07, 3.62it/s] 32%|███▏ | 118628/371472 [9:26:31<19:24:55, 3.62it/s] 32%|███▏ | 118629/371472 [9:26:31<18:47:22, 3.74it/s] 32%|███▏ | 118630/371472 [9:26:32<19:20:00, 3.63it/s] 32%|███▏ | 118631/371472 [9:26:32<21:01:15, 3.34it/s] 32%|███▏ | 118632/371472 [9:26:32<22:00:09, 3.19it/s] 32%|███▏ | 118633/371472 [9:26:33<22:44:12, 3.09it/s] 32%|███▏ | 118634/371472 [9:26:33<22:07:13, 3.18it/s] 32%|███▏ | 118635/371472 [9:26:33<20:34:51, 3.41it/s] 32%|███▏ | 118636/371472 [9:26:34<19:31:09, 3.60it/s] 32%|███▏ | 118637/371472 [9:26:34<19:02:35, 3.69it/s] 32%|███▏ | 118638/371472 [9:26:34<21:19:49, 3.29it/s] 32%|███▏ | 118639/371472 [9:26:34<21:06:31, 3.33it/s] 32%|███▏ | 118640/371472 [9:26:35<20:28:22, 3.43it/s] {'loss': 3.3275, 'learning_rate': 7.128897412139886e-07, 'epoch': 5.11} + 32%|███▏ | 118640/371472 [9:26:35<20:28:22, 3.43it/s] 32%|███▏ | 118641/371472 [9:26:35<19:45:41, 3.55it/s] 32%|███▏ | 118642/371472 [9:26:35<20:19:23, 3.46it/s] 32%|███▏ | 118643/371472 [9:26:36<21:07:45, 3.32it/s] 32%|███▏ | 118644/371472 [9:26:36<20:58:19, 3.35it/s] 32%|███▏ | 118645/371472 [9:26:36<20:15:51, 3.47it/s] 32%|███▏ | 118646/371472 [9:26:37<21:20:17, 3.29it/s] 32%|███▏ | 118647/371472 [9:26:37<19:59:38, 3.51it/s] 32%|███▏ | 118648/371472 [9:26:37<19:36:53, 3.58it/s] 32%|███▏ | 118649/371472 [9:26:37<20:15:50, 3.47it/s] 32%|███▏ | 118650/371472 [9:26:38<20:42:23, 3.39it/s] 32%|███▏ | 118651/371472 [9:26:38<20:57:33, 3.35it/s] 32%|███▏ | 118652/371472 [9:26:38<24:38:19, 2.85it/s] 32%|███▏ | 118653/371472 [9:26:39<22:33:14, 3.11it/s] 32%|███▏ | 118654/371472 [9:26:39<21:46:04, 3.23it/s] 32%|███▏ | 118655/371472 [9:26:39<22:15:33, 3.15it/s] 32%|███▏ | 118656/371472 [9:26:40<20:46:05, 3.38it/s] 32%|███▏ | 118657/371472 [9:26:40<19:52:11, 3.53it/s] 32%|███▏ | 118658/371472 [9:26:40<20:04:35, 3.50it/s] 32%|███▏ | 118659/371472 [9:26:40<19:35:07, 3.59it/s] 32%|███▏ | 118660/371472 [9:26:41<19:30:26, 3.60it/s] {'loss': 3.3727, 'learning_rate': 7.128412592385097e-07, 'epoch': 5.11} + 32%|███▏ | 118660/371472 [9:26:41<19:30:26, 3.60it/s] 32%|███▏ | 118661/371472 [9:26:41<20:24:29, 3.44it/s] 32%|███▏ | 118662/371472 [9:26:41<20:24:13, 3.44it/s] 32%|███▏ | 118663/371472 [9:26:42<21:36:07, 3.25it/s] 32%|███▏ | 118664/371472 [9:26:42<22:36:54, 3.11it/s] 32%|███▏ | 118665/371472 [9:26:42<21:22:36, 3.29it/s] 32%|███▏ | 118666/371472 [9:26:43<22:35:02, 3.11it/s] 32%|███▏ | 118667/371472 [9:26:43<22:10:58, 3.17it/s] 32%|███▏ | 118668/371472 [9:26:43<21:06:32, 3.33it/s] 32%|███▏ | 118669/371472 [9:26:43<20:22:24, 3.45it/s] 32%|███▏ | 118670/371472 [9:26:44<20:33:54, 3.41it/s] 32%|███▏ | 118671/371472 [9:26:44<21:50:00, 3.22it/s] 32%|███▏ | 118672/371472 [9:26:44<21:57:00, 3.20it/s] 32%|███▏ | 118673/371472 [9:26:45<21:52:46, 3.21it/s] 32%|███▏ | 118674/371472 [9:26:45<20:45:35, 3.38it/s] 32%|███▏ | 118675/371472 [9:26:45<21:04:41, 3.33it/s] 32%|███▏ | 118676/371472 [9:26:46<21:09:43, 3.32it/s] 32%|███▏ | 118677/371472 [9:26:46<20:03:12, 3.50it/s] 32%|███▏ | 118678/371472 [9:26:46<20:10:17, 3.48it/s] 32%|███▏ | 118679/371472 [9:26:46<19:22:13, 3.63it/s] 32%|███▏ | 118680/371472 [9:26:47<18:54:48, 3.71it/s] {'loss': 3.0476, 'learning_rate': 7.127927772630309e-07, 'epoch': 5.11} + 32%|███▏ | 118680/371472 [9:26:47<18:54:48, 3.71it/s] 32%|███▏ | 118681/371472 [9:26:47<18:41:19, 3.76it/s] 32%|███▏ | 118682/371472 [9:26:47<19:46:48, 3.55it/s] 32%|███▏ | 118683/371472 [9:26:47<19:54:29, 3.53it/s] 32%|███▏ | 118684/371472 [9:26:48<19:01:34, 3.69it/s] 32%|███▏ | 118685/371472 [9:26:48<18:41:32, 3.76it/s] 32%|███▏ | 118686/371472 [9:26:48<20:11:33, 3.48it/s] 32%|███▏ | 118687/371472 [9:26:49<20:16:35, 3.46it/s] 32%|███▏ | 118688/371472 [9:26:49<20:54:20, 3.36it/s] 32%|███▏ | 118689/371472 [9:26:49<20:03:51, 3.50it/s] 32%|███▏ | 118690/371472 [9:26:49<19:16:09, 3.64it/s] 32%|███▏ | 118691/371472 [9:26:50<20:17:13, 3.46it/s] 32%|███▏ | 118692/371472 [9:26:50<21:54:17, 3.21it/s] 32%|███▏ | 118693/371472 [9:26:51<24:14:35, 2.90it/s] 32%|███▏ | 118694/371472 [9:26:51<22:38:24, 3.10it/s] 32%|███▏ | 118695/371472 [9:26:51<22:56:42, 3.06it/s] 32%|███▏ | 118696/371472 [9:26:51<21:48:35, 3.22it/s] 32%|███▏ | 118697/371472 [9:26:52<20:49:55, 3.37it/s] 32%|███▏ | 118698/371472 [9:26:52<20:02:34, 3.50it/s] 32%|███▏ | 118699/371472 [9:26:52<20:18:54, 3.46it/s] 32%|███▏ | 118700/371472 [9:26:53<19:33:12, 3.59it/s] {'loss': 3.1781, 'learning_rate': 7.12744295287552e-07, 'epoch': 5.11} + 32%|███▏ | 118700/371472 [9:26:53<19:33:12, 3.59it/s] 32%|███▏ | 118701/371472 [9:26:53<19:05:03, 3.68it/s] 32%|███▏ | 118702/371472 [9:26:53<19:11:56, 3.66it/s] 32%|███▏ | 118703/371472 [9:26:53<18:46:55, 3.74it/s] 32%|███▏ | 118704/371472 [9:26:54<18:30:31, 3.79it/s] 32%|███▏ | 118705/371472 [9:26:54<18:49:12, 3.73it/s] 32%|███▏ | 118706/371472 [9:26:54<19:38:03, 3.58it/s] 32%|███▏ | 118707/371472 [9:26:54<20:12:43, 3.47it/s] 32%|███▏ | 118708/371472 [9:26:55<19:44:19, 3.56it/s] 32%|███▏ | 118709/371472 [9:26:55<19:17:51, 3.64it/s] 32%|███▏ | 118710/371472 [9:26:55<18:32:09, 3.79it/s] 32%|███▏ | 118711/371472 [9:26:55<18:19:55, 3.83it/s] 32%|███▏ | 118712/371472 [9:26:56<18:22:03, 3.82it/s] 32%|███▏ | 118713/371472 [9:26:56<18:33:22, 3.78it/s] 32%|███▏ | 118714/371472 [9:26:56<19:21:07, 3.63it/s] 32%|███▏ | 118715/371472 [9:26:57<20:33:23, 3.42it/s] 32%|███▏ | 118716/371472 [9:26:57<20:37:32, 3.40it/s] 32%|███▏ | 118717/371472 [9:26:57<19:28:44, 3.60it/s] 32%|███▏ | 118718/371472 [9:26:57<19:19:20, 3.63it/s] 32%|███▏ | 118719/371472 [9:26:58<19:12:36, 3.65it/s] 32%|███▏ | 118720/371472 [9:26:58<18:45:09, 3.74it/s] {'loss': 3.3102, 'learning_rate': 7.126958133120729e-07, 'epoch': 5.11} + 32%|███▏ | 118720/371472 [9:26:58<18:45:09, 3.74it/s] 32%|███▏ | 118721/371472 [9:26:58<18:47:25, 3.74it/s] 32%|███▏ | 118722/371472 [9:26:59<20:10:55, 3.48it/s] 32%|███▏ | 118723/371472 [9:26:59<20:01:47, 3.51it/s] 32%|███▏ | 118724/371472 [9:26:59<21:07:06, 3.32it/s] 32%|███▏ | 118725/371472 [9:26:59<20:14:04, 3.47it/s] 32%|███▏ | 118726/371472 [9:27:00<19:42:50, 3.56it/s] 32%|███▏ | 118727/371472 [9:27:00<20:32:54, 3.42it/s] 32%|███▏ | 118728/371472 [9:27:00<20:24:37, 3.44it/s] 32%|███▏ | 118729/371472 [9:27:01<20:30:02, 3.42it/s] 32%|███▏ | 118730/371472 [9:27:01<20:15:39, 3.47it/s] 32%|███▏ | 118731/371472 [9:27:01<19:28:41, 3.60it/s] 32%|███▏ | 118732/371472 [9:27:01<19:15:04, 3.65it/s] 32%|███▏ | 118733/371472 [9:27:02<20:13:31, 3.47it/s] 32%|███▏ | 118734/371472 [9:27:02<21:49:40, 3.22it/s] 32%|███▏ | 118735/371472 [9:27:02<21:36:38, 3.25it/s] 32%|███▏ | 118736/371472 [9:27:03<21:42:41, 3.23it/s] 32%|███▏ | 118737/371472 [9:27:03<21:13:38, 3.31it/s] 32%|███▏ | 118738/371472 [9:27:03<20:58:08, 3.35it/s] 32%|███▏ | 118739/371472 [9:27:04<19:45:56, 3.55it/s] 32%|███▏ | 118740/371472 [9:27:04<19:41:35, 3.56it/s] {'loss': 3.4757, 'learning_rate': 7.126473313365941e-07, 'epoch': 5.11} + 32%|███▏ | 118740/371472 [9:27:04<19:41:35, 3.56it/s] 32%|███▏ | 118741/371472 [9:27:04<18:51:36, 3.72it/s] 32%|███▏ | 118742/371472 [9:27:04<18:21:51, 3.82it/s] 32%|███▏ | 118743/371472 [9:27:05<19:13:17, 3.65it/s] 32%|███▏ | 118744/371472 [9:27:05<19:08:45, 3.67it/s] 32%|███▏ | 118745/371472 [9:27:05<19:22:37, 3.62it/s] 32%|███▏ | 118746/371472 [9:27:05<21:05:21, 3.33it/s] 32%|███▏ | 118747/371472 [9:27:06<19:48:23, 3.54it/s] 32%|███▏ | 118748/371472 [9:27:06<21:12:59, 3.31it/s] 32%|███▏ | 118749/371472 [9:27:06<21:02:28, 3.34it/s] 32%|███▏ | 118750/371472 [9:27:07<20:39:55, 3.40it/s] 32%|███▏ | 118751/371472 [9:27:07<19:43:14, 3.56it/s] 32%|███▏ | 118752/371472 [9:27:07<19:38:28, 3.57it/s] 32%|███▏ | 118753/371472 [9:27:07<19:05:55, 3.68it/s] 32%|███▏ | 118754/371472 [9:27:08<20:01:50, 3.50it/s] 32%|███▏ | 118755/371472 [9:27:08<19:53:20, 3.53it/s] 32%|███▏ | 118756/371472 [9:27:08<20:21:09, 3.45it/s] 32%|███▏ | 118757/371472 [9:27:09<20:37:03, 3.40it/s] 32%|███▏ | 118758/371472 [9:27:09<19:53:27, 3.53it/s] 32%|███▏ | 118759/371472 [9:27:09<19:19:48, 3.63it/s] 32%|███▏ | 118760/371472 [9:27:09<19:36:31, 3.58it/s] {'loss': 3.3084, 'learning_rate': 7.125988493611153e-07, 'epoch': 5.12} + 32%|███▏ | 118760/371472 [9:27:09<19:36:31, 3.58it/s] 32%|███▏ | 118761/371472 [9:27:10<20:05:46, 3.49it/s] 32%|███▏ | 118762/371472 [9:27:10<19:33:19, 3.59it/s] 32%|███▏ | 118763/371472 [9:27:10<19:48:08, 3.54it/s] 32%|███▏ | 118764/371472 [9:27:11<19:11:06, 3.66it/s] 32%|███▏ | 118765/371472 [9:27:11<18:33:51, 3.78it/s] 32%|███▏ | 118766/371472 [9:27:11<21:31:56, 3.26it/s] 32%|███▏ | 118767/371472 [9:27:11<20:05:21, 3.49it/s] 32%|███▏ | 118768/371472 [9:27:12<20:37:00, 3.40it/s] 32%|███▏ | 118769/371472 [9:27:12<20:12:09, 3.47it/s] 32%|███▏ | 118770/371472 [9:27:12<20:08:46, 3.48it/s] 32%|███▏ | 118771/371472 [9:27:13<19:48:10, 3.54it/s] 32%|███▏ | 118772/371472 [9:27:13<19:50:15, 3.54it/s] 32%|███▏ | 118773/371472 [9:27:13<19:30:52, 3.60it/s] 32%|███▏ | 118774/371472 [9:27:13<18:58:34, 3.70it/s] 32%|███▏ | 118775/371472 [9:27:14<18:28:49, 3.80it/s] 32%|███▏ | 118776/371472 [9:27:14<19:08:04, 3.67it/s] 32%|███▏ | 118777/371472 [9:27:14<18:59:27, 3.70it/s] 32%|███▏ | 118778/371472 [9:27:14<18:28:46, 3.80it/s] 32%|███▏ | 118779/371472 [9:27:15<18:14:05, 3.85it/s] 32%|███▏ | 118780/371472 [9:27:15<18:15:30, 3.84it/s] {'loss': 3.3996, 'learning_rate': 7.125503673856364e-07, 'epoch': 5.12} + 32%|███▏ | 118780/371472 [9:27:15<18:15:30, 3.84it/s] 32%|███▏ | 118781/371472 [9:27:15<20:19:46, 3.45it/s] 32%|███▏ | 118782/371472 [9:27:16<20:02:28, 3.50it/s] 32%|███▏ | 118783/371472 [9:27:16<19:16:02, 3.64it/s] 32%|███▏ | 118784/371472 [9:27:16<20:20:26, 3.45it/s] 32%|███▏ | 118785/371472 [9:27:16<21:25:46, 3.28it/s] 32%|███▏ | 118786/371472 [9:27:17<20:40:01, 3.40it/s] 32%|███▏ | 118787/371472 [9:27:17<20:21:45, 3.45it/s] 32%|███▏ | 118788/371472 [9:27:17<19:35:39, 3.58it/s] 32%|███▏ | 118789/371472 [9:27:18<19:35:55, 3.58it/s] 32%|███▏ | 118790/371472 [9:27:18<19:25:29, 3.61it/s] 32%|███▏ | 118791/371472 [9:27:18<20:07:57, 3.49it/s] 32%|███▏ | 118792/371472 [9:27:18<19:38:03, 3.57it/s] 32%|███▏ | 118793/371472 [9:27:19<20:08:59, 3.48it/s] 32%|███▏ | 118794/371472 [9:27:19<19:57:33, 3.52it/s] 32%|███▏ | 118795/371472 [9:27:19<20:24:31, 3.44it/s] 32%|███▏ | 118796/371472 [9:27:20<22:16:44, 3.15it/s] 32%|███▏ | 118797/371472 [9:27:20<22:20:49, 3.14it/s] 32%|███▏ | 118798/371472 [9:27:20<21:39:05, 3.24it/s] 32%|███▏ | 118799/371472 [9:27:21<21:18:28, 3.29it/s] 32%|███▏ | 118800/371472 [9:27:21<20:21:12, 3.45it/s] {'loss': 3.447, 'learning_rate': 7.125018854101575e-07, 'epoch': 5.12} + 32%|███▏ | 118800/371472 [9:27:21<20:21:12, 3.45it/s] 32%|███▏ | 118801/371472 [9:27:21<20:45:55, 3.38it/s] 32%|███▏ | 118802/371472 [9:27:21<19:54:04, 3.53it/s] 32%|███▏ | 118803/371472 [9:27:22<19:37:35, 3.58it/s] 32%|███▏ | 118804/371472 [9:27:22<19:45:44, 3.55it/s] 32%|███▏ | 118805/371472 [9:27:22<20:12:58, 3.47it/s] 32%|███▏ | 118806/371472 [9:27:23<19:41:49, 3.56it/s] 32%|███▏ | 118807/371472 [9:27:23<19:04:49, 3.68it/s] 32%|███▏ | 118808/371472 [9:27:23<18:35:38, 3.77it/s] 32%|███▏ | 118809/371472 [9:27:23<18:27:17, 3.80it/s] 32%|███▏ | 118810/371472 [9:27:24<21:00:49, 3.34it/s] 32%|███▏ | 118811/371472 [9:27:24<20:26:55, 3.43it/s] 32%|███▏ | 118812/371472 [9:27:24<19:52:30, 3.53it/s] 32%|███▏ | 118813/371472 [9:27:25<20:27:46, 3.43it/s] 32%|███▏ | 118814/371472 [9:27:25<19:28:50, 3.60it/s] 32%|███▏ | 118815/371472 [9:27:25<18:47:05, 3.74it/s] 32%|███▏ | 118816/371472 [9:27:25<18:36:50, 3.77it/s] 32%|███▏ | 118817/371472 [9:27:26<18:58:43, 3.70it/s] 32%|███▏ | 118818/371472 [9:27:26<20:18:26, 3.46it/s] 32%|███▏ | 118819/371472 [9:27:26<21:02:36, 3.34it/s] 32%|███▏ | 118820/371472 [9:27:27<21:19:50, 3.29it/s] {'loss': 3.2432, 'learning_rate': 7.124534034346787e-07, 'epoch': 5.12} + 32%|███▏ | 118820/371472 [9:27:27<21:19:50, 3.29it/s] 32%|███▏ | 118821/371472 [9:27:27<21:47:30, 3.22it/s] 32%|███▏ | 118822/371472 [9:27:27<21:21:21, 3.29it/s] 32%|███▏ | 118823/371472 [9:27:27<20:52:26, 3.36it/s] 32%|███▏ | 118824/371472 [9:27:28<20:43:38, 3.39it/s] 32%|███▏ | 118825/371472 [9:27:28<20:15:13, 3.47it/s] 32%|███▏ | 118826/371472 [9:27:28<21:39:04, 3.24it/s] 32%|███▏ | 118827/371472 [9:27:29<21:39:01, 3.24it/s] 32%|███▏ | 118828/371472 [9:27:29<20:41:45, 3.39it/s] 32%|███▏ | 118829/371472 [9:27:29<20:46:16, 3.38it/s] 32%|███▏ | 118830/371472 [9:27:29<20:01:19, 3.51it/s] 32%|███▏ | 118831/371472 [9:27:30<19:34:33, 3.58it/s] 32%|███▏ | 118832/371472 [9:27:30<19:41:43, 3.56it/s] 32%|███▏ | 118833/371472 [9:27:30<18:55:15, 3.71it/s] 32%|███▏ | 118834/371472 [9:27:31<20:18:30, 3.46it/s] 32%|███▏ | 118835/371472 [9:27:31<21:11:31, 3.31it/s] 32%|███▏ | 118836/371472 [9:27:31<22:37:23, 3.10it/s] 32%|███▏ | 118837/371472 [9:27:32<21:59:33, 3.19it/s] 32%|███▏ | 118838/371472 [9:27:32<21:15:12, 3.30it/s] 32%|███▏ | 118839/371472 [9:27:32<20:46:48, 3.38it/s] 32%|███▏ | 118840/371472 [9:27:32<20:26:29, 3.43it/s] {'loss': 3.2017, 'learning_rate': 7.124049214591997e-07, 'epoch': 5.12} + 32%|███▏ | 118840/371472 [9:27:32<20:26:29, 3.43it/s] 32%|███▏ | 118841/371472 [9:27:33<21:16:57, 3.30it/s] 32%|███▏ | 118842/371472 [9:27:33<21:40:11, 3.24it/s] 32%|███▏ | 118843/371472 [9:27:33<22:05:28, 3.18it/s] 32%|███▏ | 118844/371472 [9:27:34<21:15:42, 3.30it/s] 32%|███▏ | 118845/371472 [9:27:34<20:15:21, 3.46it/s] 32%|███▏ | 118846/371472 [9:27:34<19:07:31, 3.67it/s] 32%|███▏ | 118847/371472 [9:27:34<18:56:16, 3.71it/s] 32%|███▏ | 118848/371472 [9:27:35<18:50:02, 3.73it/s] 32%|███▏ | 118849/371472 [9:27:35<18:44:19, 3.74it/s] 32%|███▏ | 118850/371472 [9:27:35<19:23:48, 3.62it/s] 32%|███▏ | 118851/371472 [9:27:36<19:01:45, 3.69it/s] 32%|███▏ | 118852/371472 [9:27:36<19:32:27, 3.59it/s] 32%|███▏ | 118853/371472 [9:27:36<19:07:50, 3.67it/s] 32%|███▏ | 118854/371472 [9:27:36<19:05:09, 3.68it/s] 32%|███▏ | 118855/371472 [9:27:37<18:59:05, 3.70it/s] 32%|███▏ | 118856/371472 [9:27:37<18:57:02, 3.70it/s] 32%|███▏ | 118857/371472 [9:27:37<19:48:32, 3.54it/s] 32%|███▏ | 118858/371472 [9:27:37<19:00:05, 3.69it/s] 32%|███▏ | 118859/371472 [9:27:38<18:28:42, 3.80it/s] 32%|███▏ | 118860/371472 [9:27:38<18:32:45, 3.78it/s] {'loss': 3.1725, 'learning_rate': 7.123564394837208e-07, 'epoch': 5.12} + 32%|███▏ | 118860/371472 [9:27:38<18:32:45, 3.78it/s] 32%|███▏ | 118861/371472 [9:27:38<18:56:46, 3.70it/s] 32%|███▏ | 118862/371472 [9:27:39<20:18:42, 3.45it/s] 32%|███▏ | 118863/371472 [9:27:39<20:37:53, 3.40it/s] 32%|███▏ | 118864/371472 [9:27:39<20:43:09, 3.39it/s] 32%|███▏ | 118865/371472 [9:27:39<20:04:40, 3.49it/s] 32%|███▏ | 118866/371472 [9:27:40<19:25:04, 3.61it/s] 32%|███▏ | 118867/371472 [9:27:40<20:10:55, 3.48it/s] 32%|███▏ | 118868/371472 [9:27:40<19:26:36, 3.61it/s] 32%|███▏ | 118869/371472 [9:27:41<19:51:09, 3.53it/s] 32%|███▏ | 118870/371472 [9:27:41<19:44:31, 3.55it/s] 32%|███▏ | 118871/371472 [9:27:41<19:54:02, 3.53it/s] 32%|███▏ | 118872/371472 [9:27:41<19:17:09, 3.64it/s] 32%|███▏ | 118873/371472 [9:27:42<19:59:47, 3.51it/s] 32%|███▏ | 118874/371472 [9:27:42<19:15:17, 3.64it/s] 32%|███▏ | 118875/371472 [9:27:42<19:18:38, 3.63it/s] 32%|███▏ | 118876/371472 [9:27:42<18:54:52, 3.71it/s] 32%|███▏ | 118877/371472 [9:27:43<18:52:34, 3.72it/s] 32%|███▏ | 118878/371472 [9:27:43<19:18:01, 3.64it/s] 32%|███▏ | 118879/371472 [9:27:43<19:22:58, 3.62it/s] 32%|███▏ | 118880/371472 [9:27:44<18:44:50, 3.74it/s] {'loss': 3.2998, 'learning_rate': 7.123079575082419e-07, 'epoch': 5.12} + 32%|███▏ | 118880/371472 [9:27:44<18:44:50, 3.74it/s] 32%|███▏ | 118881/371472 [9:27:44<20:04:20, 3.50it/s] 32%|███▏ | 118882/371472 [9:27:44<19:38:59, 3.57it/s] 32%|███▏ | 118883/371472 [9:27:44<19:30:11, 3.60it/s] 32%|███▏ | 118884/371472 [9:27:45<19:52:44, 3.53it/s] 32%|███▏ | 118885/371472 [9:27:45<19:40:00, 3.57it/s] 32%|███▏ | 118886/371472 [9:27:45<21:06:15, 3.32it/s] 32%|███▏ | 118887/371472 [9:27:46<21:52:23, 3.21it/s] 32%|███▏ | 118888/371472 [9:27:46<21:31:04, 3.26it/s] 32%|███▏ | 118889/371472 [9:27:46<20:19:46, 3.45it/s] 32%|███▏ | 118890/371472 [9:27:46<19:29:09, 3.60it/s] 32%|███▏ | 118891/371472 [9:27:47<19:42:09, 3.56it/s] 32%|███▏ | 118892/371472 [9:27:47<19:28:36, 3.60it/s] 32%|███▏ | 118893/371472 [9:27:47<19:05:08, 3.68it/s] 32%|███▏ | 118894/371472 [9:27:48<18:47:34, 3.73it/s] 32%|███▏ | 118895/371472 [9:27:48<18:59:30, 3.69it/s] 32%|███▏ | 118896/371472 [9:27:48<19:30:04, 3.60it/s] 32%|███▏ | 118897/371472 [9:27:48<20:35:04, 3.41it/s] 32%|███▏ | 118898/371472 [9:27:49<19:35:51, 3.58it/s] 32%|███▏ | 118899/371472 [9:27:49<20:04:38, 3.49it/s] 32%|███▏ | 118900/371472 [9:27:49<19:36:24, 3.58it/s] {'loss': 3.179, 'learning_rate': 7.12259475532763e-07, 'epoch': 5.12} + 32%|███▏ | 118900/371472 [9:27:49<19:36:24, 3.58it/s] 32%|███▏ | 118901/371472 [9:27:50<20:10:51, 3.48it/s] 32%|███▏ | 118902/371472 [9:27:50<20:11:33, 3.47it/s] 32%|███▏ | 118903/371472 [9:27:50<20:04:16, 3.50it/s] 32%|███▏ | 118904/371472 [9:27:50<18:57:55, 3.70it/s] 32%|███▏ | 118905/371472 [9:27:51<18:49:50, 3.73it/s] 32%|███▏ | 118906/371472 [9:27:51<20:00:46, 3.51it/s] 32%|███▏ | 118907/371472 [9:27:51<20:15:10, 3.46it/s] 32%|███▏ | 118908/371472 [9:27:52<19:55:12, 3.52it/s] 32%|███▏ | 118909/371472 [9:27:52<20:27:23, 3.43it/s] 32%|███▏ | 118910/371472 [9:27:52<20:04:53, 3.49it/s] 32%|███▏ | 118911/371472 [9:27:52<19:35:43, 3.58it/s] 32%|███▏ | 118912/371472 [9:27:53<19:25:15, 3.61it/s] 32%|███▏ | 118913/371472 [9:27:53<19:19:08, 3.63it/s] 32%|███▏ | 118914/371472 [9:27:53<18:49:17, 3.73it/s] 32%|███▏ | 118915/371472 [9:27:53<19:33:08, 3.59it/s] 32%|███▏ | 118916/371472 [9:27:54<19:41:02, 3.56it/s] 32%|███▏ | 118917/371472 [9:27:54<19:51:40, 3.53it/s] 32%|███▏ | 118918/371472 [9:27:54<19:37:25, 3.57it/s] 32%|███▏ | 118919/371472 [9:27:55<20:39:59, 3.39it/s] 32%|███▏ | 118920/371472 [9:27:55<20:02:26, 3.50it/s] {'loss': 3.1106, 'learning_rate': 7.122109935572842e-07, 'epoch': 5.12} + 32%|███▏ | 118920/371472 [9:27:55<20:02:26, 3.50it/s] 32%|███▏ | 118921/371472 [9:27:55<19:39:35, 3.57it/s] 32%|███▏ | 118922/371472 [9:27:55<19:23:25, 3.62it/s] 32%|███▏ | 118923/371472 [9:27:56<21:32:27, 3.26it/s] 32%|███▏ | 118924/371472 [9:27:56<20:27:30, 3.43it/s] 32%|███▏ | 118925/371472 [9:27:57<23:24:27, 3.00it/s] 32%|███▏ | 118926/371472 [9:27:57<22:23:07, 3.13it/s] 32%|███▏ | 118927/371472 [9:27:57<21:21:08, 3.29it/s] 32%|███▏ | 118928/371472 [9:27:57<22:12:02, 3.16it/s] 32%|███▏ | 118929/371472 [9:27:58<21:29:14, 3.26it/s] 32%|███▏ | 118930/371472 [9:27:58<20:57:44, 3.35it/s] 32%|███▏ | 118931/371472 [9:27:58<21:54:12, 3.20it/s] 32%|███▏ | 118932/371472 [9:27:59<21:19:22, 3.29it/s] 32%|███▏ | 118933/371472 [9:27:59<20:29:07, 3.42it/s] 32%|███▏ | 118934/371472 [9:27:59<19:42:42, 3.56it/s] 32%|███▏ | 118935/371472 [9:27:59<19:09:19, 3.66it/s] 32%|███▏ | 118936/371472 [9:28:00<18:57:26, 3.70it/s] 32%|███▏ | 118937/371472 [9:28:00<18:54:05, 3.71it/s] 32%|███▏ | 118938/371472 [9:28:00<19:41:14, 3.56it/s] 32%|███▏ | 118939/371472 [9:28:00<19:00:39, 3.69it/s] 32%|███▏ | 118940/371472 [9:28:01<18:37:51, 3.77it/s] {'loss': 3.1492, 'learning_rate': 7.121625115818053e-07, 'epoch': 5.12} + 32%|███▏ | 118940/371472 [9:28:01<18:37:51, 3.77it/s] 32%|███▏ | 118941/371472 [9:28:01<18:48:24, 3.73it/s] 32%|███▏ | 118942/371472 [9:28:01<18:25:08, 3.81it/s] 32%|███▏ | 118943/371472 [9:28:01<18:11:52, 3.85it/s] 32%|███▏ | 118944/371472 [9:28:02<18:18:05, 3.83it/s] 32%|███▏ | 118945/371472 [9:28:02<18:28:11, 3.80it/s] 32%|███▏ | 118946/371472 [9:28:02<19:11:39, 3.65it/s] 32%|███▏ | 118947/371472 [9:28:03<18:53:03, 3.71it/s] 32%|███▏ | 118948/371472 [9:28:03<19:07:32, 3.67it/s] 32%|███▏ | 118949/371472 [9:28:03<18:36:58, 3.77it/s] 32%|███▏ | 118950/371472 [9:28:03<19:09:49, 3.66it/s] 32%|███▏ | 118951/371472 [9:28:04<19:08:46, 3.66it/s] 32%|███▏ | 118952/371472 [9:28:04<19:45:14, 3.55it/s] 32%|███▏ | 118953/371472 [9:28:04<19:25:13, 3.61it/s] 32%|███▏ | 118954/371472 [9:28:05<20:11:07, 3.47it/s] 32%|███▏ | 118955/371472 [9:28:05<19:56:57, 3.52it/s] 32%|███▏ | 118956/371472 [9:28:05<19:13:15, 3.65it/s] 32%|███▏ | 118957/371472 [9:28:05<18:33:33, 3.78it/s] 32%|███▏ | 118958/371472 [9:28:06<19:51:48, 3.53it/s] 32%|███▏ | 118959/371472 [9:28:06<21:19:31, 3.29it/s] 32%|███▏ | 118960/371472 [9:28:06<20:45:07, 3.38it/s] {'loss': 3.2879, 'learning_rate': 7.121140296063264e-07, 'epoch': 5.12} + 32%|███▏ | 118960/371472 [9:28:06<20:45:07, 3.38it/s] 32%|███▏ | 118961/371472 [9:28:07<20:44:10, 3.38it/s] 32%|███▏ | 118962/371472 [9:28:07<21:25:16, 3.27it/s] 32%|███▏ | 118963/371472 [9:28:07<21:08:33, 3.32it/s] 32%|███▏ | 118964/371472 [9:28:07<20:57:02, 3.35it/s] 32%|███▏ | 118965/371472 [9:28:08<20:07:12, 3.49it/s] 32%|███▏ | 118966/371472 [9:28:08<19:50:15, 3.54it/s] 32%|███▏ | 118967/371472 [9:28:08<20:36:20, 3.40it/s] 32%|███▏ | 118968/371472 [9:28:09<19:56:10, 3.52it/s] 32%|███▏ | 118969/371472 [9:28:09<18:55:20, 3.71it/s] 32%|███▏ | 118970/371472 [9:28:09<18:57:24, 3.70it/s] 32%|███▏ | 118971/371472 [9:28:09<19:59:41, 3.51it/s] 32%|███▏ | 118972/371472 [9:28:10<19:52:58, 3.53it/s] 32%|███▏ | 118973/371472 [9:28:10<19:41:40, 3.56it/s] 32%|███▏ | 118974/371472 [9:28:10<20:55:15, 3.35it/s] 32%|███▏ | 118975/371472 [9:28:11<20:39:37, 3.39it/s] 32%|███▏ | 118976/371472 [9:28:11<20:05:50, 3.49it/s] 32%|███▏ | 118977/371472 [9:28:11<19:29:19, 3.60it/s] 32%|███▏ | 118978/371472 [9:28:11<19:04:12, 3.68it/s] 32%|███▏ | 118979/371472 [9:28:12<18:31:34, 3.79it/s] 32%|███▏ | 118980/371472 [9:28:12<19:24:11, 3.61it/s] {'loss': 3.3552, 'learning_rate': 7.120655476308474e-07, 'epoch': 5.12} + 32%|███▏ | 118980/371472 [9:28:12<19:24:11, 3.61it/s] 32%|███▏ | 118981/371472 [9:28:12<18:46:27, 3.74it/s] 32%|███▏ | 118982/371472 [9:28:12<18:21:51, 3.82it/s] 32%|███▏ | 118983/371472 [9:28:13<18:30:24, 3.79it/s] 32%|███▏ | 118984/371472 [9:28:13<20:06:30, 3.49it/s] 32%|███▏ | 118985/371472 [9:28:13<20:14:42, 3.46it/s] 32%|███▏ | 118986/371472 [9:28:14<19:41:56, 3.56it/s] 32%|███▏ | 118987/371472 [9:28:14<21:37:05, 3.24it/s] 32%|███▏ | 118988/371472 [9:28:14<20:15:12, 3.46it/s] 32%|███▏ | 118989/371472 [9:28:15<22:04:41, 3.18it/s] 32%|███▏ | 118990/371472 [9:28:15<21:47:05, 3.22it/s] 32%|███▏ | 118991/371472 [9:28:15<20:41:20, 3.39it/s] 32%|███▏ | 118992/371472 [9:28:15<21:32:06, 3.26it/s] 32%|███▏ | 118993/371472 [9:28:16<21:09:57, 3.31it/s] 32%|███▏ | 118994/371472 [9:28:16<21:17:21, 3.29it/s] 32%|███▏ | 118995/371472 [9:28:16<20:30:57, 3.42it/s] 32%|███▏ | 118996/371472 [9:28:17<20:42:41, 3.39it/s] 32%|███▏ | 118997/371472 [9:28:17<21:18:25, 3.29it/s] 32%|███▏ | 118998/371472 [9:28:17<22:15:46, 3.15it/s] 32%|███▏ | 118999/371472 [9:28:18<21:50:10, 3.21it/s] 32%|███▏ | 119000/371472 [9:28:18<21:06:51, 3.32it/s] {'loss': 3.1907, 'learning_rate': 7.120170656553685e-07, 'epoch': 5.13} + 32%|███▏ | 119000/371472 [9:28:18<21:06:51, 3.32it/s] 32%|███▏ | 119001/371472 [9:28:18<20:45:04, 3.38it/s] 32%|███▏ | 119002/371472 [9:28:18<20:55:33, 3.35it/s] 32%|███▏ | 119003/371472 [9:28:19<20:23:51, 3.44it/s] 32%|███▏ | 119004/371472 [9:28:19<20:02:16, 3.50it/s] 32%|███▏ | 119005/371472 [9:28:19<19:27:28, 3.60it/s] 32%|███▏ | 119006/371472 [9:28:20<19:22:07, 3.62it/s] 32%|███▏ | 119007/371472 [9:28:20<19:17:42, 3.63it/s] 32%|███▏ | 119008/371472 [9:28:20<19:25:35, 3.61it/s] 32%|███▏ | 119009/371472 [9:28:20<20:54:14, 3.35it/s] 32%|███▏ | 119010/371472 [9:28:21<20:54:40, 3.35it/s] 32%|███▏ | 119011/371472 [9:28:21<19:54:25, 3.52it/s] 32%|███▏ | 119012/371472 [9:28:21<19:30:15, 3.60it/s] 32%|███▏ | 119013/371472 [9:28:22<18:56:48, 3.70it/s] 32%|███▏ | 119014/371472 [9:28:22<20:20:33, 3.45it/s] 32%|███▏ | 119015/371472 [9:28:22<19:51:48, 3.53it/s] 32%|███▏ | 119016/371472 [9:28:22<19:40:36, 3.56it/s] 32%|███▏ | 119017/371472 [9:28:23<18:51:26, 3.72it/s] 32%|███▏ | 119018/371472 [9:28:23<19:25:28, 3.61it/s] 32%|███▏ | 119019/371472 [9:28:23<18:45:22, 3.74it/s] 32%|███▏ | 119020/371472 [9:28:23<18:30:32, 3.79it/s] {'loss': 3.2938, 'learning_rate': 7.119685836798896e-07, 'epoch': 5.13} + 32%|███▏ | 119020/371472 [9:28:23<18:30:32, 3.79it/s] 32%|███▏ | 119021/371472 [9:28:24<18:40:16, 3.76it/s] 32%|███▏ | 119022/371472 [9:28:24<19:41:57, 3.56it/s] 32%|███▏ | 119023/371472 [9:28:24<19:04:47, 3.68it/s] 32%|███▏ | 119024/371472 [9:28:25<19:08:17, 3.66it/s] 32%|███▏ | 119025/371472 [9:28:25<20:06:32, 3.49it/s] 32%|███▏ | 119026/371472 [9:28:25<19:46:58, 3.54it/s] 32%|███▏ | 119027/371472 [9:28:25<19:12:45, 3.65it/s] 32%|███▏ | 119028/371472 [9:28:26<19:19:06, 3.63it/s] 32%|███▏ | 119029/371472 [9:28:26<20:02:50, 3.50it/s] 32%|███▏ | 119030/371472 [9:28:26<20:13:06, 3.47it/s] 32%|███▏ | 119031/371472 [9:28:27<20:49:53, 3.37it/s] 32%|███▏ | 119032/371472 [9:28:27<21:00:22, 3.34it/s] 32%|███▏ | 119033/371472 [9:28:27<20:36:56, 3.40it/s] 32%|███▏ | 119034/371472 [9:28:28<21:24:18, 3.28it/s] 32%|███▏ | 119035/371472 [9:28:28<20:16:51, 3.46it/s] 32%|███▏ | 119036/371472 [9:28:28<20:07:50, 3.48it/s] 32%|███▏ | 119037/371472 [9:28:28<20:41:04, 3.39it/s] 32%|███▏ | 119038/371472 [9:28:29<20:25:02, 3.43it/s] 32%|███▏ | 119039/371472 [9:28:29<19:51:22, 3.53it/s] 32%|███▏ | 119040/371472 [9:28:29<19:40:54, 3.56it/s] {'loss': 3.3589, 'learning_rate': 7.119201017044107e-07, 'epoch': 5.13} + 32%|███▏ | 119040/371472 [9:28:29<19:40:54, 3.56it/s] 32%|███▏ | 119041/371472 [9:28:30<20:09:21, 3.48it/s] 32%|███▏ | 119042/371472 [9:28:30<21:04:51, 3.33it/s] 32%|███▏ | 119043/371472 [9:28:30<22:46:15, 3.08it/s] 32%|███▏ | 119044/371472 [9:28:31<22:13:22, 3.16it/s] 32%|███▏ | 119045/371472 [9:28:31<20:47:20, 3.37it/s] 32%|███▏ | 119046/371472 [9:28:31<21:01:04, 3.34it/s] 32%|███▏ | 119047/371472 [9:28:31<19:52:45, 3.53it/s] 32%|███▏ | 119048/371472 [9:28:32<19:25:40, 3.61it/s] 32%|███▏ | 119049/371472 [9:28:32<18:52:10, 3.72it/s] 32%|███▏ | 119050/371472 [9:28:32<18:31:57, 3.78it/s] 32%|███▏ | 119051/371472 [9:28:32<19:32:10, 3.59it/s] 32%|███▏ | 119052/371472 [9:28:33<20:01:36, 3.50it/s] 32%|███▏ | 119053/371472 [9:28:33<20:02:25, 3.50it/s] 32%|███▏ | 119054/371472 [9:28:33<20:05:54, 3.49it/s] 32%|███▏ | 119055/371472 [9:28:34<19:22:05, 3.62it/s] 32%|███▏ | 119056/371472 [9:28:34<19:54:08, 3.52it/s] 32%|███▏ | 119057/371472 [9:28:34<19:21:29, 3.62it/s] 32%|███▏ | 119058/371472 [9:28:34<18:44:57, 3.74it/s] 32%|███▏ | 119059/371472 [9:28:35<18:29:25, 3.79it/s] 32%|███▏ | 119060/371472 [9:28:35<18:11:03, 3.86it/s] {'loss': 3.1688, 'learning_rate': 7.118716197289319e-07, 'epoch': 5.13} + 32%|███▏ | 119060/371472 [9:28:35<18:11:03, 3.86it/s] 32%|███▏ | 119061/371472 [9:28:35<18:56:38, 3.70it/s] 32%|███▏ | 119062/371472 [9:28:36<21:23:20, 3.28it/s] 32%|███▏ | 119063/371472 [9:28:36<21:04:09, 3.33it/s] 32%|███▏ | 119064/371472 [9:28:36<20:21:26, 3.44it/s] 32%|███▏ | 119065/371472 [9:28:36<20:19:22, 3.45it/s] 32%|███▏ | 119066/371472 [9:28:37<19:36:39, 3.58it/s] 32%|███▏ | 119067/371472 [9:28:37<19:11:17, 3.65it/s] 32%|███▏ | 119068/371472 [9:28:37<19:24:16, 3.61it/s] 32%|███▏ | 119069/371472 [9:28:37<19:31:23, 3.59it/s] 32%|███▏ | 119070/371472 [9:28:38<19:31:21, 3.59it/s] 32%|███▏ | 119071/371472 [9:28:38<19:08:39, 3.66it/s] 32%|███▏ | 119072/371472 [9:28:38<21:42:38, 3.23it/s] 32%|███▏ | 119073/371472 [9:28:39<20:37:57, 3.40it/s] 32%|███▏ | 119074/371472 [9:28:39<21:38:42, 3.24it/s] 32%|███▏ | 119075/371472 [9:28:39<21:21:33, 3.28it/s] 32%|███▏ | 119076/371472 [9:28:40<20:47:25, 3.37it/s] 32%|███▏ | 119077/371472 [9:28:40<19:44:04, 3.55it/s] 32%|███▏ | 119078/371472 [9:28:40<19:32:55, 3.59it/s] 32%|███▏ | 119079/371472 [9:28:40<19:21:47, 3.62it/s] 32%|███▏ | 119080/371472 [9:28:41<20:52:23, 3.36it/s] {'loss': 3.2452, 'learning_rate': 7.11823137753453e-07, 'epoch': 5.13} + 32%|███▏ | 119080/371472 [9:28:41<20:52:23, 3.36it/s] 32%|███▏ | 119081/371472 [9:28:41<19:55:46, 3.52it/s] 32%|███▏ | 119082/371472 [9:28:41<18:55:27, 3.70it/s] 32%|███▏ | 119083/371472 [9:28:41<18:22:33, 3.82it/s] 32%|███▏ | 119084/371472 [9:28:42<19:33:52, 3.58it/s] 32%|███▏ | 119085/371472 [9:28:42<20:20:06, 3.45it/s] 32%|███▏ | 119086/371472 [9:28:42<19:57:18, 3.51it/s] 32%|███▏ | 119087/371472 [9:28:43<19:02:59, 3.68it/s] 32%|███▏ | 119088/371472 [9:28:43<18:26:16, 3.80it/s] 32%|███▏ | 119089/371472 [9:28:43<18:44:50, 3.74it/s] 32%|███▏ | 119090/371472 [9:28:43<17:56:32, 3.91it/s] 32%|███▏ | 119091/371472 [9:28:44<18:23:13, 3.81it/s] 32%|███▏ | 119092/371472 [9:28:44<18:23:22, 3.81it/s] 32%|███▏ | 119093/371472 [9:28:44<18:12:57, 3.85it/s] 32%|███▏ | 119094/371472 [9:28:44<18:24:27, 3.81it/s] 32%|███▏ | 119095/371472 [9:28:45<21:00:03, 3.34it/s] 32%|███▏ | 119096/371472 [9:28:45<21:28:13, 3.27it/s] 32%|███▏ | 119097/371472 [9:28:45<20:44:13, 3.38it/s] 32%|███▏ | 119098/371472 [9:28:46<21:03:06, 3.33it/s] 32%|███▏ | 119099/371472 [9:28:46<21:00:51, 3.34it/s] 32%|███▏ | 119100/371472 [9:28:46<19:48:06, 3.54it/s] {'loss': 3.3892, 'learning_rate': 7.11774655777974e-07, 'epoch': 5.13} + 32%|███▏ | 119100/371472 [9:28:46<19:48:06, 3.54it/s] 32%|███▏ | 119101/371472 [9:28:46<19:27:23, 3.60it/s] 32%|███▏ | 119102/371472 [9:28:47<18:57:27, 3.70it/s] 32%|███▏ | 119103/371472 [9:28:47<19:28:05, 3.60it/s] 32%|███▏ | 119104/371472 [9:28:47<19:00:52, 3.69it/s] 32%|███▏ | 119105/371472 [9:28:48<19:15:23, 3.64it/s] 32%|███▏ | 119106/371472 [9:28:48<19:16:27, 3.64it/s] 32%|███▏ | 119107/371472 [9:28:48<18:48:17, 3.73it/s] 32%|███▏ | 119108/371472 [9:28:48<18:59:15, 3.69it/s] 32%|███▏ | 119109/371472 [9:28:49<20:45:58, 3.38it/s] 32%|███▏ | 119110/371472 [9:28:49<19:46:11, 3.55it/s] 32%|███▏ | 119111/371472 [9:28:49<18:55:23, 3.70it/s] 32%|███▏ | 119112/371472 [9:28:49<18:41:12, 3.75it/s] 32%|███▏ | 119113/371472 [9:28:50<19:03:27, 3.68it/s] 32%|███▏ | 119114/371472 [9:28:50<18:35:19, 3.77it/s] 32%|███▏ | 119115/371472 [9:28:50<18:08:17, 3.86it/s] 32%|███▏ | 119116/371472 [9:28:51<19:48:18, 3.54it/s] 32%|███▏ | 119117/371472 [9:28:51<20:14:39, 3.46it/s] 32%|███▏ | 119118/371472 [9:28:51<20:32:23, 3.41it/s] 32%|███▏ | 119119/371472 [9:28:51<19:37:43, 3.57it/s] 32%|███▏ | 119120/371472 [9:28:52<20:08:12, 3.48it/s] {'loss': 3.425, 'learning_rate': 7.117261738024951e-07, 'epoch': 5.13} + 32%|███▏ | 119120/371472 [9:28:52<20:08:12, 3.48it/s] 32%|███▏ | 119121/371472 [9:28:52<19:28:03, 3.60it/s] 32%|███▏ | 119122/371472 [9:28:52<19:21:55, 3.62it/s] 32%|███▏ | 119123/371472 [9:28:53<20:04:03, 3.49it/s] 32%|███▏ | 119124/371472 [9:28:53<21:11:30, 3.31it/s] 32%|███▏ | 119125/371472 [9:28:53<20:49:58, 3.36it/s] 32%|███▏ | 119126/371472 [9:28:53<20:20:41, 3.45it/s] 32%|███▏ | 119127/371472 [9:28:54<20:31:06, 3.42it/s] 32%|███▏ | 119128/371472 [9:28:54<20:29:01, 3.42it/s] 32%|███▏ | 119129/371472 [9:28:54<19:40:21, 3.56it/s] 32%|███▏ | 119130/371472 [9:28:55<18:46:50, 3.73it/s] 32%|███▏ | 119131/371472 [9:28:55<18:41:44, 3.75it/s] 32%|███▏ | 119132/371472 [9:28:55<18:26:41, 3.80it/s] 32%|███▏ | 119133/371472 [9:28:55<19:29:07, 3.60it/s] 32%|███▏ | 119134/371472 [9:28:56<19:16:10, 3.64it/s] 32%|███▏ | 119135/371472 [9:28:56<18:52:20, 3.71it/s] 32%|███▏ | 119136/371472 [9:28:56<18:52:17, 3.71it/s] 32%|███▏ | 119137/371472 [9:28:56<18:57:39, 3.70it/s] 32%|███▏ | 119138/371472 [9:28:57<18:34:31, 3.77it/s] 32%|███▏ | 119139/371472 [9:28:57<18:23:17, 3.81it/s] 32%|███▏ | 119140/371472 [9:28:57<18:19:06, 3.83it/s] {'loss': 3.2473, 'learning_rate': 7.116776918270163e-07, 'epoch': 5.13} + 32%|███▏ | 119140/371472 [9:28:57<18:19:06, 3.83it/s] 32%|███▏ | 119141/371472 [9:28:57<18:16:45, 3.83it/s] 32%|███▏ | 119142/371472 [9:28:58<18:37:08, 3.76it/s] 32%|███▏ | 119143/371472 [9:28:58<18:21:35, 3.82it/s] 32%|███▏ | 119144/371472 [9:28:58<19:17:31, 3.63it/s] 32%|███▏ | 119145/371472 [9:28:59<19:16:08, 3.64it/s] 32%|███▏ | 119146/371472 [9:28:59<18:47:36, 3.73it/s] 32%|███▏ | 119147/371472 [9:28:59<19:20:21, 3.62it/s] 32%|███▏ | 119148/371472 [9:28:59<19:56:37, 3.51it/s] 32%|███▏ | 119149/371472 [9:29:00<21:00:12, 3.34it/s] 32%|███▏ | 119150/371472 [9:29:00<19:54:37, 3.52it/s] 32%|███▏ | 119151/371472 [9:29:00<19:47:59, 3.54it/s] 32%|███▏ | 119152/371472 [9:29:01<19:45:13, 3.55it/s] 32%|███▏ | 119153/371472 [9:29:01<20:12:04, 3.47it/s] 32%|███▏ | 119154/371472 [9:29:01<19:47:02, 3.54it/s] 32%|███▏ | 119155/371472 [9:29:01<20:03:52, 3.49it/s] 32%|███▏ | 119156/371472 [9:29:02<19:48:03, 3.54it/s] 32%|███▏ | 119157/371472 [9:29:02<19:12:16, 3.65it/s] 32%|███▏ | 119158/371472 [9:29:02<19:04:14, 3.68it/s] 32%|███▏ | 119159/371472 [9:29:03<19:00:01, 3.69it/s] 32%|███▏ | 119160/371472 [9:29:03<19:06:07, 3.67it/s] {'loss': 3.3399, 'learning_rate': 7.116292098515374e-07, 'epoch': 5.13} + 32%|███▏ | 119160/371472 [9:29:03<19:06:07, 3.67it/s] 32%|███▏ | 119161/371472 [9:29:03<21:06:59, 3.32it/s] 32%|███▏ | 119162/371472 [9:29:03<20:50:00, 3.36it/s] 32%|███▏ | 119163/371472 [9:29:04<19:54:26, 3.52it/s] 32%|███▏ | 119164/371472 [9:29:04<20:08:36, 3.48it/s] 32%|███▏ | 119165/371472 [9:29:04<20:18:03, 3.45it/s] 32%|███▏ | 119166/371472 [9:29:05<19:36:05, 3.58it/s] 32%|███▏ | 119167/371472 [9:29:05<19:29:45, 3.59it/s] 32%|███▏ | 119168/371472 [9:29:05<19:05:42, 3.67it/s] 32%|███▏ | 119169/371472 [9:29:05<18:37:28, 3.76it/s] 32%|███▏ | 119170/371472 [9:29:06<18:45:59, 3.73it/s] 32%|███▏ | 119171/371472 [9:29:06<18:27:44, 3.80it/s] 32%|███▏ | 119172/371472 [9:29:06<17:57:19, 3.90it/s] 32%|███▏ | 119173/371472 [9:29:06<18:03:56, 3.88it/s] 32%|███▏ | 119174/371472 [9:29:07<18:11:59, 3.85it/s] 32%|███▏ | 119175/371472 [9:29:07<18:41:51, 3.75it/s] 32%|███▏ | 119176/371472 [9:29:07<18:34:42, 3.77it/s] 32%|███▏ | 119177/371472 [9:29:07<18:49:09, 3.72it/s] 32%|███▏ | 119178/371472 [9:29:08<18:36:41, 3.77it/s] 32%|███▏ | 119179/371472 [9:29:08<18:20:32, 3.82it/s] 32%|███▏ | 119180/371472 [9:29:08<20:23:58, 3.44it/s] {'loss': 3.4017, 'learning_rate': 7.115807278760585e-07, 'epoch': 5.13} + 32%|███▏ | 119180/371472 [9:29:08<20:23:58, 3.44it/s] 32%|███▏ | 119181/371472 [9:29:09<19:25:01, 3.61it/s] 32%|███▏ | 119182/371472 [9:29:09<18:54:55, 3.70it/s] 32%|███▏ | 119183/371472 [9:29:09<18:32:38, 3.78it/s] 32%|███▏ | 119184/371472 [9:29:09<18:24:18, 3.81it/s] 32%|███▏ | 119185/371472 [9:29:10<18:06:26, 3.87it/s] 32%|███▏ | 119186/371472 [9:29:10<19:53:13, 3.52it/s] 32%|███▏ | 119187/371472 [9:29:10<19:32:57, 3.58it/s] 32%|███▏ | 119188/371472 [9:29:10<18:56:15, 3.70it/s] 32%|███▏ | 119189/371472 [9:29:11<19:35:15, 3.58it/s] 32%|███▏ | 119190/371472 [9:29:11<19:28:06, 3.60it/s] 32%|███▏ | 119191/371472 [9:29:11<20:47:44, 3.37it/s] 32%|███▏ | 119192/371472 [9:29:12<20:49:07, 3.37it/s] 32%|███▏ | 119193/371472 [9:29:12<20:33:59, 3.41it/s] 32%|███▏ | 119194/371472 [9:29:12<19:34:34, 3.58it/s] 32%|███▏ | 119195/371472 [9:29:12<19:24:17, 3.61it/s] 32%|███▏ | 119196/371472 [9:29:13<19:47:00, 3.54it/s] 32%|███▏ | 119197/371472 [9:29:13<19:53:10, 3.52it/s] 32%|███▏ | 119198/371472 [9:29:13<19:48:45, 3.54it/s] 32%|███▏ | 119199/371472 [9:29:14<18:55:05, 3.70it/s] 32%|███▏ | 119200/371472 [9:29:14<19:14:53, 3.64it/s] {'loss': 3.1981, 'learning_rate': 7.115322459005796e-07, 'epoch': 5.13} + 32%|███▏ | 119200/371472 [9:29:14<19:14:53, 3.64it/s] 32%|███▏ | 119201/371472 [9:29:14<20:26:41, 3.43it/s] 32%|███▏ | 119202/371472 [9:29:14<19:58:27, 3.51it/s] 32%|███▏ | 119203/371472 [9:29:15<19:19:29, 3.63it/s] 32%|███▏ | 119204/371472 [9:29:15<18:48:47, 3.72it/s] 32%|███▏ | 119205/371472 [9:29:15<19:19:34, 3.63it/s] 32%|███▏ | 119206/371472 [9:29:16<19:12:04, 3.65it/s] 32%|███▏ | 119207/371472 [9:29:16<18:18:56, 3.83it/s] 32%|███▏ | 119208/371472 [9:29:16<18:43:27, 3.74it/s] 32%|███▏ | 119209/371472 [9:29:16<18:31:25, 3.78it/s] 32%|███▏ | 119210/371472 [9:29:17<18:22:58, 3.81it/s] 32%|███▏ | 119211/371472 [9:29:17<18:36:49, 3.76it/s] 32%|███▏ | 119212/371472 [9:29:17<18:43:29, 3.74it/s] 32%|███▏ | 119213/371472 [9:29:17<18:41:42, 3.75it/s] 32%|███▏ | 119214/371472 [9:29:18<18:12:19, 3.85it/s] 32%|███▏ | 119215/371472 [9:29:18<18:58:41, 3.69it/s] 32%|███▏ | 119216/371472 [9:29:18<18:58:25, 3.69it/s] 32%|███▏ | 119217/371472 [9:29:18<18:58:20, 3.69it/s] 32%|███▏ | 119218/371472 [9:29:19<18:27:58, 3.79it/s] 32%|███▏ | 119219/371472 [9:29:19<18:35:25, 3.77it/s] 32%|███▏ | 119220/371472 [9:29:19<18:39:32, 3.76it/s] {'loss': 3.248, 'learning_rate': 7.114837639251008e-07, 'epoch': 5.14} + 32%|███▏ | 119220/371472 [9:29:19<18:39:32, 3.76it/s] 32%|███▏ | 119221/371472 [9:29:19<18:28:09, 3.79it/s] 32%|███▏ | 119222/371472 [9:29:20<18:34:17, 3.77it/s] 32%|███▏ | 119223/371472 [9:29:20<18:37:34, 3.76it/s] 32%|███▏ | 119224/371472 [9:29:20<18:59:02, 3.69it/s] 32%|███▏ | 119225/371472 [9:29:21<19:44:19, 3.55it/s] 32%|███▏ | 119226/371472 [9:29:21<19:39:42, 3.56it/s] 32%|███▏ | 119227/371472 [9:29:21<20:40:06, 3.39it/s] 32%|███▏ | 119228/371472 [9:29:21<20:00:55, 3.50it/s] 32%|███▏ | 119229/371472 [9:29:22<19:33:20, 3.58it/s] 32%|███▏ | 119230/371472 [9:29:22<19:21:15, 3.62it/s] 32%|███▏ | 119231/371472 [9:29:22<19:07:21, 3.66it/s] 32%|███▏ | 119232/371472 [9:29:23<18:35:39, 3.77it/s] 32%|███▏ | 119233/371472 [9:29:23<19:00:53, 3.68it/s] 32%|███▏ | 119234/371472 [9:29:23<18:45:48, 3.73it/s] 32%|███▏ | 119235/371472 [9:29:23<18:54:32, 3.71it/s] 32%|███▏ | 119236/371472 [9:29:24<18:20:45, 3.82it/s] 32%|███▏ | 119237/371472 [9:29:24<18:32:10, 3.78it/s] 32%|███▏ | 119238/371472 [9:29:24<19:04:53, 3.67it/s] 32%|███▏ | 119239/371472 [9:29:24<20:39:56, 3.39it/s] 32%|███▏ | 119240/371472 [9:29:25<20:25:41, 3.43it/s] {'loss': 3.2817, 'learning_rate': 7.114352819496217e-07, 'epoch': 5.14} + 32%|███▏ | 119240/371472 [9:29:25<20:25:41, 3.43it/s] 32%|███▏ | 119241/371472 [9:29:25<19:40:27, 3.56it/s] 32%|███▏ | 119242/371472 [9:29:25<19:35:37, 3.58it/s] 32%|███▏ | 119243/371472 [9:29:26<21:55:14, 3.20it/s] 32%|███▏ | 119244/371472 [9:29:26<20:43:08, 3.38it/s] 32%|███▏ | 119245/371472 [9:29:26<19:42:17, 3.56it/s] 32%|███▏ | 119246/371472 [9:29:26<19:24:23, 3.61it/s] 32%|███▏ | 119247/371472 [9:29:27<19:16:33, 3.63it/s] 32%|███▏ | 119248/371472 [9:29:27<19:09:29, 3.66it/s] 32%|███▏ | 119249/371472 [9:29:27<19:13:35, 3.64it/s] 32%|███▏ | 119250/371472 [9:29:28<19:19:50, 3.62it/s] 32%|███▏ | 119251/371472 [9:29:28<18:59:42, 3.69it/s] 32%|███▏ | 119252/371472 [9:29:28<18:41:25, 3.75it/s] 32%|███▏ | 119253/371472 [9:29:28<19:04:14, 3.67it/s] 32%|███▏ | 119254/371472 [9:29:29<21:44:27, 3.22it/s] 32%|███▏ | 119255/371472 [9:29:29<20:42:02, 3.38it/s] 32%|███▏ | 119256/371472 [9:29:29<20:12:58, 3.47it/s] 32%|███▏ | 119257/371472 [9:29:30<20:52:32, 3.36it/s] 32%|███▏ | 119258/371472 [9:29:30<20:04:08, 3.49it/s] 32%|███▏ | 119259/371472 [9:29:30<20:27:02, 3.43it/s] 32%|███▏ | 119260/371472 [9:29:30<19:46:25, 3.54it/s] {'loss': 3.3869, 'learning_rate': 7.113867999741429e-07, 'epoch': 5.14} + 32%|███▏ | 119260/371472 [9:29:30<19:46:25, 3.54it/s] 32%|███▏ | 119261/371472 [9:29:31<19:13:24, 3.64it/s] 32%|███▏ | 119262/371472 [9:29:31<18:27:37, 3.80it/s] 32%|███▏ | 119263/371472 [9:29:31<18:53:43, 3.71it/s] 32%|███▏ | 119264/371472 [9:29:31<18:46:52, 3.73it/s] 32%|███▏ | 119265/371472 [9:29:32<18:24:43, 3.80it/s] 32%|███▏ | 119266/371472 [9:29:32<18:22:28, 3.81it/s] 32%|███▏ | 119267/371472 [9:29:32<18:14:21, 3.84it/s] 32%|███▏ | 119268/371472 [9:29:33<19:10:08, 3.65it/s] 32%|███▏ | 119269/371472 [9:29:33<20:01:40, 3.50it/s] 32%|███▏ | 119270/371472 [9:29:33<20:06:19, 3.48it/s] 32%|███▏ | 119271/371472 [9:29:33<20:01:28, 3.50it/s] 32%|███▏ | 119272/371472 [9:29:34<19:14:52, 3.64it/s] 32%|███▏ | 119273/371472 [9:29:34<19:13:49, 3.64it/s] 32%|███▏ | 119274/371472 [9:29:34<18:27:41, 3.79it/s] 32%|███▏ | 119275/371472 [9:29:35<20:00:31, 3.50it/s] 32%|███▏ | 119276/371472 [9:29:35<19:40:20, 3.56it/s] 32%|███▏ | 119277/371472 [9:29:35<19:24:38, 3.61it/s] 32%|███▏ | 119278/371472 [9:29:35<19:04:35, 3.67it/s] 32%|███▏ | 119279/371472 [9:29:36<18:54:12, 3.71it/s] 32%|███▏ | 119280/371472 [9:29:36<19:44:18, 3.55it/s] {'loss': 3.408, 'learning_rate': 7.11338317998664e-07, 'epoch': 5.14} + 32%|███▏ | 119280/371472 [9:29:36<19:44:18, 3.55it/s] 32%|███▏ | 119281/371472 [9:29:36<19:56:15, 3.51it/s] 32%|███▏ | 119282/371472 [9:29:37<21:15:38, 3.29it/s] 32%|███▏ | 119283/371472 [9:29:37<20:01:59, 3.50it/s] 32%|███▏ | 119284/371472 [9:29:37<19:39:42, 3.56it/s] 32%|███▏ | 119285/371472 [9:29:37<19:48:25, 3.54it/s] 32%|███▏ | 119286/371472 [9:29:38<19:55:19, 3.52it/s] 32%|███▏ | 119287/371472 [9:29:38<21:06:45, 3.32it/s] 32%|███▏ | 119288/371472 [9:29:38<20:22:52, 3.44it/s] 32%|███▏ | 119289/371472 [9:29:39<20:55:43, 3.35it/s] 32%|███▏ | 119290/371472 [9:29:39<21:20:21, 3.28it/s] 32%|███▏ | 119291/371472 [9:29:39<20:55:19, 3.35it/s] 32%|███▏ | 119292/371472 [9:29:39<21:14:11, 3.30it/s] 32%|███▏ | 119293/371472 [9:29:40<21:20:27, 3.28it/s] 32%|███▏ | 119294/371472 [9:29:40<21:27:48, 3.26it/s] 32%|███▏ | 119295/371472 [9:29:40<20:26:08, 3.43it/s] 32%|███▏ | 119296/371472 [9:29:41<19:45:43, 3.54it/s] 32%|███▏ | 119297/371472 [9:29:41<19:28:50, 3.60it/s] 32%|███▏ | 119298/371472 [9:29:41<19:34:42, 3.58it/s] 32%|███▏ | 119299/371472 [9:29:41<19:51:57, 3.53it/s] 32%|███▏ | 119300/371472 [9:29:42<19:04:56, 3.67it/s] {'loss': 3.162, 'learning_rate': 7.112898360231851e-07, 'epoch': 5.14} + 32%|███▏ | 119300/371472 [9:29:42<19:04:56, 3.67it/s] 32%|███▏ | 119301/371472 [9:29:42<19:19:00, 3.63it/s] 32%|███▏ | 119302/371472 [9:29:42<19:20:56, 3.62it/s] 32%|███▏ | 119303/371472 [9:29:43<18:53:12, 3.71it/s] 32%|███▏ | 119304/371472 [9:29:43<19:21:50, 3.62it/s] 32%|███▏ | 119305/371472 [9:29:43<20:36:20, 3.40it/s] 32%|███▏ | 119306/371472 [9:29:43<19:58:05, 3.51it/s] 32%|███▏ | 119307/371472 [9:29:44<20:27:39, 3.42it/s] 32%|███▏ | 119308/371472 [9:29:44<19:55:45, 3.51it/s] 32%|███▏ | 119309/371472 [9:29:44<19:41:50, 3.56it/s] 32%|███▏ | 119310/371472 [9:29:45<19:05:01, 3.67it/s] 32%|███▏ | 119311/371472 [9:29:45<20:43:30, 3.38it/s] 32%|█��█▏ | 119312/371472 [9:29:45<20:59:58, 3.34it/s] 32%|███▏ | 119313/371472 [9:29:45<20:05:02, 3.49it/s] 32%|███▏ | 119314/371472 [9:29:46<20:34:15, 3.40it/s] 32%|███▏ | 119315/371472 [9:29:46<19:40:00, 3.56it/s] 32%|███▏ | 119316/371472 [9:29:46<18:51:05, 3.72it/s] 32%|███▏ | 119317/371472 [9:29:47<19:09:20, 3.66it/s] 32%|███▏ | 119318/371472 [9:29:47<20:50:52, 3.36it/s] 32%|███▏ | 119319/371472 [9:29:47<20:06:57, 3.48it/s] 32%|███▏ | 119320/371472 [9:29:47<19:46:59, 3.54it/s] {'loss': 3.2604, 'learning_rate': 7.112413540477063e-07, 'epoch': 5.14} + 32%|███▏ | 119320/371472 [9:29:47<19:46:59, 3.54it/s] 32%|███▏ | 119321/371472 [9:29:48<22:21:58, 3.13it/s] 32%|███▏ | 119322/371472 [9:29:48<21:20:08, 3.28it/s] 32%|███▏ | 119323/371472 [9:29:48<21:30:44, 3.26it/s] 32%|███▏ | 119324/371472 [9:29:49<21:04:44, 3.32it/s] 32%|███▏ | 119325/371472 [9:29:49<20:43:30, 3.38it/s] 32%|███▏ | 119326/371472 [9:29:49<20:51:20, 3.36it/s] 32%|███▏ | 119327/371472 [9:29:50<20:13:37, 3.46it/s] 32%|███▏ | 119328/371472 [9:29:50<19:01:53, 3.68it/s] 32%|███▏ | 119329/371472 [9:29:50<18:47:39, 3.73it/s] 32%|███▏ | 119330/371472 [9:29:50<19:23:53, 3.61it/s] 32%|███▏ | 119331/371472 [9:29:51<18:51:27, 3.71it/s] 32%|███▏ | 119332/371472 [9:29:51<18:45:50, 3.73it/s] 32%|███▏ | 119333/371472 [9:29:51<19:04:18, 3.67it/s] 32%|███▏ | 119334/371472 [9:29:51<19:22:25, 3.62it/s] 32%|███▏ | 119335/371472 [9:29:52<22:39:26, 3.09it/s] 32%|███▏ | 119336/371472 [9:29:52<21:45:18, 3.22it/s] 32%|███▏ | 119337/371472 [9:29:52<20:23:49, 3.43it/s] 32%|███▏ | 119338/371472 [9:29:53<19:58:14, 3.51it/s] 32%|███▏ | 119339/371472 [9:29:53<21:11:22, 3.31it/s] 32%|███▏ | 119340/371472 [9:29:53<21:10:31, 3.31it/s] {'loss': 3.176, 'learning_rate': 7.111928720722274e-07, 'epoch': 5.14} + 32%|███▏ | 119340/371472 [9:29:53<21:10:31, 3.31it/s] 32%|███▏ | 119341/371472 [9:29:54<20:50:39, 3.36it/s] 32%|███▏ | 119342/371472 [9:29:54<20:24:21, 3.43it/s] 32%|███▏ | 119343/371472 [9:29:54<19:36:22, 3.57it/s] 32%|███▏ | 119344/371472 [9:29:54<19:25:21, 3.61it/s] 32%|███▏ | 119345/371472 [9:29:55<19:43:27, 3.55it/s] 32%|███▏ | 119346/371472 [9:29:55<19:29:52, 3.59it/s] 32%|███▏ | 119347/371472 [9:29:55<18:49:43, 3.72it/s] 32%|███▏ | 119348/371472 [9:29:55<18:47:14, 3.73it/s] 32%|███▏ | 119349/371472 [9:29:56<18:59:50, 3.69it/s] 32%|███▏ | 119350/371472 [9:29:56<18:14:41, 3.84it/s] 32%|███▏ | 119351/371472 [9:29:56<18:15:59, 3.83it/s] 32%|███▏ | 119352/371472 [9:29:57<18:33:01, 3.78it/s] 32%|███▏ | 119353/371472 [9:29:57<18:38:30, 3.76it/s] 32%|███▏ | 119354/371472 [9:29:57<18:09:29, 3.86it/s] 32%|███▏ | 119355/371472 [9:29:57<18:01:50, 3.88it/s] 32%|███▏ | 119356/371472 [9:29:58<20:28:04, 3.42it/s] 32%|███▏ | 119357/371472 [9:29:58<20:11:45, 3.47it/s] 32%|███▏ | 119358/371472 [9:29:58<19:53:59, 3.52it/s] 32%|███▏ | 119359/371472 [9:29:59<20:53:33, 3.35it/s] 32%|███▏ | 119360/371472 [9:29:59<20:29:19, 3.42it/s] {'loss': 3.3473, 'learning_rate': 7.111443900967484e-07, 'epoch': 5.14} + 32%|███▏ | 119360/371472 [9:29:59<20:29:19, 3.42it/s] 32%|███▏ | 119361/371472 [9:29:59<20:42:50, 3.38it/s] 32%|███▏ | 119362/371472 [9:29:59<20:07:33, 3.48it/s] 32%|███▏ | 119363/371472 [9:30:00<21:30:21, 3.26it/s] 32%|███▏ | 119364/371472 [9:30:00<21:05:30, 3.32it/s] 32%|███▏ | 119365/371472 [9:30:00<22:20:41, 3.13it/s] 32%|███▏ | 119366/371472 [9:30:01<21:35:07, 3.24it/s] 32%|███▏ | 119367/371472 [9:30:01<20:56:00, 3.35it/s] 32%|███▏ | 119368/371472 [9:30:01<21:14:55, 3.30it/s] 32%|███▏ | 119369/371472 [9:30:02<20:03:44, 3.49it/s] 32%|███▏ | 119370/371472 [9:30:02<19:13:03, 3.64it/s] 32%|███▏ | 119371/371472 [9:30:02<19:18:03, 3.63it/s] 32%|███▏ | 119372/371472 [9:30:02<19:36:29, 3.57it/s] 32%|███▏ | 119373/371472 [9:30:03<19:59:06, 3.50it/s] 32%|███▏ | 119374/371472 [9:30:03<22:02:57, 3.18it/s] 32%|███▏ | 119375/371472 [9:30:03<21:06:29, 3.32it/s] 32%|███▏ | 119376/371472 [9:30:04<20:50:26, 3.36it/s] 32%|███▏ | 119377/371472 [9:30:04<20:22:07, 3.44it/s] 32%|███▏ | 119378/371472 [9:30:04<20:08:38, 3.48it/s] 32%|███▏ | 119379/371472 [9:30:04<21:15:01, 3.30it/s] 32%|███▏ | 119380/371472 [9:30:05<20:56:33, 3.34it/s] {'loss': 3.199, 'learning_rate': 7.110959081212695e-07, 'epoch': 5.14} + 32%|███▏ | 119380/371472 [9:30:05<20:56:33, 3.34it/s] 32%|███▏ | 119381/371472 [9:30:05<20:16:59, 3.45it/s] 32%|███▏ | 119382/371472 [9:30:05<20:14:15, 3.46it/s] 32%|███▏ | 119383/371472 [9:30:06<19:24:15, 3.61it/s] 32%|███▏ | 119384/371472 [9:30:06<18:55:02, 3.70it/s] 32%|███▏ | 119385/371472 [9:30:06<18:43:54, 3.74it/s] 32%|███▏ | 119386/371472 [9:30:06<18:21:00, 3.82it/s] 32%|███▏ | 119387/371472 [9:30:07<18:48:04, 3.72it/s] 32%|███▏ | 119388/371472 [9:30:07<17:59:54, 3.89it/s] 32%|███▏ | 119389/371472 [9:30:07<18:04:42, 3.87it/s] 32%|███▏ | 119390/371472 [9:30:07<17:44:30, 3.95it/s] 32%|███▏ | 119391/371472 [9:30:08<18:23:09, 3.81it/s] 32%|███▏ | 119392/371472 [9:30:08<17:43:49, 3.95it/s] 32%|███▏ | 119393/371472 [9:30:08<17:26:32, 4.01it/s] 32%|███▏ | 119394/371472 [9:30:08<18:31:21, 3.78it/s] 32%|███▏ | 119395/371472 [9:30:09<18:44:16, 3.74it/s] 32%|███▏ | 119396/371472 [9:30:09<19:27:18, 3.60it/s] 32%|███▏ | 119397/371472 [9:30:09<19:33:03, 3.58it/s] 32%|███▏ | 119398/371472 [9:30:10<19:04:44, 3.67it/s] 32%|███▏ | 119399/371472 [9:30:10<18:50:09, 3.72it/s] 32%|███▏ | 119400/371472 [9:30:10<19:31:52, 3.58it/s] {'loss': 3.3073, 'learning_rate': 7.110474261457907e-07, 'epoch': 5.14} + 32%|███▏ | 119400/371472 [9:30:10<19:31:52, 3.58it/s] 32%|███▏ | 119401/371472 [9:30:10<19:49:07, 3.53it/s] 32%|███▏ | 119402/371472 [9:30:11<19:16:52, 3.63it/s] 32%|███▏ | 119403/371472 [9:30:11<19:41:46, 3.55it/s] 32%|███▏ | 119404/371472 [9:30:11<19:29:32, 3.59it/s] 32%|███▏ | 119405/371472 [9:30:12<21:59:27, 3.18it/s] 32%|███▏ | 119406/371472 [9:30:12<20:49:12, 3.36it/s] 32%|███▏ | 119407/371472 [9:30:12<20:09:48, 3.47it/s] 32%|███▏ | 119408/371472 [9:30:12<19:07:39, 3.66it/s] 32%|███▏ | 119409/371472 [9:30:13<20:13:01, 3.46it/s] 32%|███▏ | 119410/371472 [9:30:13<19:38:03, 3.57it/s] 32%|███▏ | 119411/371472 [9:30:13<19:29:25, 3.59it/s] 32%|███▏ | 119412/371472 [9:30:13<19:08:53, 3.66it/s] 32%|███▏ | 119413/371472 [9:30:14<18:45:00, 3.73it/s] 32%|███▏ | 119414/371472 [9:30:14<18:33:36, 3.77it/s] 32%|███▏ | 119415/371472 [9:30:14<17:47:16, 3.94it/s] 32%|███▏ | 119416/371472 [9:30:14<17:48:03, 3.93it/s] 32%|███▏ | 119417/371472 [9:30:15<19:35:05, 3.57it/s] 32%|███▏ | 119418/371472 [9:30:15<19:14:55, 3.64it/s] 32%|███▏ | 119419/371472 [9:30:15<19:03:12, 3.67it/s] 32%|███▏ | 119420/371472 [9:30:16<18:45:12, 3.73it/s] {'loss': 3.3468, 'learning_rate': 7.109989441703118e-07, 'epoch': 5.14} + 32%|███▏ | 119420/371472 [9:30:16<18:45:12, 3.73it/s] 32%|███▏ | 119421/371472 [9:30:16<19:54:05, 3.52it/s] 32%|███▏ | 119422/371472 [9:30:16<21:16:42, 3.29it/s] 32%|███▏ | 119423/371472 [9:30:16<19:49:27, 3.53it/s] 32%|███▏ | 119424/371472 [9:30:17<19:38:18, 3.57it/s] 32%|███▏ | 119425/371472 [9:30:17<18:43:22, 3.74it/s] 32%|███▏ | 119426/371472 [9:30:17<20:41:23, 3.38it/s] 32%|███▏ | 119427/371472 [9:30:18<21:37:29, 3.24it/s] 32%|███▏ | 119428/371472 [9:30:18<22:45:21, 3.08it/s] 32%|███▏ | 119429/371472 [9:30:18<22:09:20, 3.16it/s] 32%|███▏ | 119430/371472 [9:30:19<22:39:08, 3.09it/s] 32%|███▏ | 119431/371472 [9:30:19<21:00:48, 3.33it/s] 32%|███▏ | 119432/371472 [9:30:19<20:01:36, 3.50it/s] 32%|███▏ | 119433/371472 [9:30:19<20:00:36, 3.50it/s] 32%|███▏ | 119434/371472 [9:30:20<20:06:15, 3.48it/s] 32%|███▏ | 119435/371472 [9:30:20<19:49:56, 3.53it/s] 32%|███▏ | 119436/371472 [9:30:20<19:27:36, 3.60it/s] 32%|███▏ | 119437/371472 [9:30:21<19:31:14, 3.59it/s] 32%|███▏ | 119438/371472 [9:30:21<19:08:48, 3.66it/s] 32%|███▏ | 119439/371472 [9:30:21<18:50:20, 3.72it/s] 32%|███▏ | 119440/371472 [9:30:21<19:08:28, 3.66it/s] {'loss': 3.2473, 'learning_rate': 7.109504621948329e-07, 'epoch': 5.14} + 32%|███▏ | 119440/371472 [9:30:21<19:08:28, 3.66it/s] 32%|███▏ | 119441/371472 [9:30:22<19:19:20, 3.62it/s] 32%|███▏ | 119442/371472 [9:30:22<19:18:39, 3.63it/s] 32%|███▏ | 119443/371472 [9:30:22<18:49:39, 3.72it/s] 32%|███▏ | 119444/371472 [9:30:22<18:56:07, 3.70it/s] 32%|███▏ | 119445/371472 [9:30:23<18:25:20, 3.80it/s] 32%|███▏ | 119446/371472 [9:30:23<18:50:46, 3.71it/s] 32%|███▏ | 119447/371472 [9:30:23<18:11:44, 3.85it/s] 32%|███▏ | 119448/371472 [9:30:24<18:44:57, 3.73it/s] 32%|███▏ | 119449/371472 [9:30:24<18:02:13, 3.88it/s] 32%|███▏ | 119450/371472 [9:30:24<17:38:58, 3.97it/s] 32%|███▏ | 119451/371472 [9:30:24<17:56:52, 3.90it/s] 32%|███▏ | 119452/371472 [9:30:25<18:45:21, 3.73it/s] 32%|███▏ | 119453/371472 [9:30:25<19:42:23, 3.55it/s] 32%|███▏ | 119454/371472 [9:30:25<19:04:13, 3.67it/s] 32%|███▏ | 119455/371472 [9:30:25<18:39:26, 3.75it/s] 32%|███▏ | 119456/371472 [9:30:26<19:35:16, 3.57it/s] 32%|███▏ | 119457/371472 [9:30:26<19:25:47, 3.60it/s] 32%|███▏ | 119458/371472 [9:30:26<19:38:28, 3.56it/s] 32%|███▏ | 119459/371472 [9:30:27<19:59:55, 3.50it/s] 32%|███▏ | 119460/371472 [9:30:27<20:03:12, 3.49it/s] {'loss': 3.2101, 'learning_rate': 7.10901980219354e-07, 'epoch': 5.15} + 32%|███▏ | 119460/371472 [9:30:27<20:03:12, 3.49it/s] 32%|███▏ | 119461/371472 [9:30:27<19:24:46, 3.61it/s] 32%|███▏ | 119462/371472 [9:30:27<19:09:04, 3.66it/s] 32%|███▏ | 119463/371472 [9:30:28<19:00:37, 3.68it/s] 32%|███▏ | 119464/371472 [9:30:28<19:05:52, 3.67it/s] 32%|███▏ | 119465/371472 [9:30:28<18:40:47, 3.75it/s] 32%|███▏ | 119466/371472 [9:30:28<18:36:59, 3.76it/s] 32%|███▏ | 119467/371472 [9:30:29<19:21:41, 3.62it/s] 32%|███▏ | 119468/371472 [9:30:29<20:37:19, 3.39it/s] 32%|███▏ | 119469/371472 [9:30:29<20:40:03, 3.39it/s] 32%|███▏ | 119470/371472 [9:30:30<20:54:19, 3.35it/s] 32%|███▏ | 119471/371472 [9:30:30<20:28:52, 3.42it/s] 32%|███▏ | 119472/371472 [9:30:30<20:40:10, 3.39it/s] 32%|███▏ | 119473/371472 [9:30:31<20:21:20, 3.44it/s] 32%|███▏ | 119474/371472 [9:30:31<20:56:11, 3.34it/s] 32%|███▏ | 119475/371472 [9:30:31<20:07:26, 3.48it/s] 32%|███▏ | 119476/371472 [9:30:31<19:46:23, 3.54it/s] 32%|███▏ | 119477/371472 [9:30:32<19:02:26, 3.68it/s] 32%|███▏ | 119478/371472 [9:30:32<18:57:18, 3.69it/s] 32%|███▏ | 119479/371472 [9:30:32<18:28:31, 3.79it/s] 32%|███▏ | 119480/371472 [9:30:32<18:55:03, 3.70it/s] {'loss': 3.329, 'learning_rate': 7.108534982438751e-07, 'epoch': 5.15} + 32%|███▏ | 119480/371472 [9:30:32<18:55:03, 3.70it/s] 32%|███▏ | 119481/371472 [9:30:33<18:13:06, 3.84it/s] 32%|███▏ | 119482/371472 [9:30:33<17:49:14, 3.93it/s] 32%|███▏ | 119483/371472 [9:30:33<17:52:19, 3.92it/s] 32%|███▏ | 119484/371472 [9:30:33<17:49:38, 3.93it/s] 32%|███▏ | 119485/371472 [9:30:34<18:45:07, 3.73it/s] 32%|███▏ | 119486/371472 [9:30:34<18:46:22, 3.73it/s] 32%|███▏ | 119487/371472 [9:30:34<18:18:52, 3.82it/s] 32%|███▏ | 119488/371472 [9:30:35<18:49:44, 3.72it/s] 32%|███▏ | 119489/371472 [9:30:35<18:23:36, 3.81it/s] 32%|███▏ | 119490/371472 [9:30:35<18:24:06, 3.80it/s] 32%|███▏ | 119491/371472 [9:30:35<18:32:02, 3.78it/s] 32%|███▏ | 119492/371472 [9:30:36<20:32:43, 3.41it/s] 32%|███▏ | 119493/371472 [9:30:36<21:51:25, 3.20it/s] 32%|███▏ | 119494/371472 [9:30:36<21:17:21, 3.29it/s] 32%|███▏ | 119495/371472 [9:30:37<22:07:27, 3.16it/s] 32%|███▏ | 119496/371472 [9:30:37<20:47:07, 3.37it/s] 32%|███▏ | 119497/371472 [9:30:37<21:05:53, 3.32it/s] 32%|███▏ | 119498/371472 [9:30:37<20:36:11, 3.40it/s] 32%|███▏ | 119499/371472 [9:30:38<19:55:42, 3.51it/s] 32%|███▏ | 119500/371472 [9:30:38<19:34:08, 3.58it/s] {'loss': 3.2842, 'learning_rate': 7.108050162683962e-07, 'epoch': 5.15} + 32%|███▏ | 119500/371472 [9:30:38<19:34:08, 3.58it/s] 32%|███▏ | 119501/371472 [9:30:38<21:55:06, 3.19it/s] 32%|███▏ | 119502/371472 [9:30:39<20:45:03, 3.37it/s] 32%|███▏ | 119503/371472 [9:30:39<19:42:26, 3.55it/s] 32%|███▏ | 119504/371472 [9:30:39<18:56:48, 3.69it/s] 32%|███▏ | 119505/371472 [9:30:39<18:37:15, 3.76it/s] 32%|███▏ | 119506/371472 [9:30:40<18:25:20, 3.80it/s] 32%|███▏ | 119507/371472 [9:30:40<18:32:20, 3.78it/s] 32%|███▏ | 119508/371472 [9:30:40<18:37:11, 3.76it/s] 32%|███▏ | 119509/371472 [9:30:41<19:33:41, 3.58it/s] 32%|███▏ | 119510/371472 [9:30:41<18:39:59, 3.75it/s] 32%|███▏ | 119511/371472 [9:30:41<18:27:37, 3.79it/s] 32%|███▏ | 119512/371472 [9:30:41<19:05:56, 3.66it/s] 32%|███▏ | 119513/371472 [9:30:42<19:33:35, 3.58it/s] 32%|███▏ | 119514/371472 [9:30:42<20:21:50, 3.44it/s] 32%|███▏ | 119515/371472 [9:30:42<19:22:59, 3.61it/s] 32%|███▏ | 119516/371472 [9:30:42<19:50:54, 3.53it/s] 32%|███▏ | 119517/371472 [9:30:43<19:10:39, 3.65it/s] 32%|███▏ | 119518/371472 [9:30:43<19:10:43, 3.65it/s] 32%|███▏ | 119519/371472 [9:30:43<18:46:48, 3.73it/s] 32%|███▏ | 119520/371472 [9:30:44<20:52:33, 3.35it/s] {'loss': 3.225, 'learning_rate': 7.107565342929172e-07, 'epoch': 5.15} + 32%|███▏ | 119520/371472 [9:30:44<20:52:33, 3.35it/s] 32%|███▏ | 119521/371472 [9:30:44<20:42:07, 3.38it/s] 32%|███▏ | 119522/371472 [9:30:44<20:01:48, 3.49it/s] 32%|███▏ | 119523/371472 [9:30:44<19:27:20, 3.60it/s] 32%|███▏ | 119524/371472 [9:30:45<21:43:57, 3.22it/s] 32%|███▏ | 119525/371472 [9:30:45<21:20:55, 3.28it/s] 32%|███▏ | 119526/371472 [9:30:45<20:29:55, 3.41it/s] 32%|███▏ | 119527/371472 [9:30:46<19:46:06, 3.54it/s] 32%|███▏ | 119528/371472 [9:30:46<20:36:46, 3.40it/s] 32%|███▏ | 119529/371472 [9:30:46<20:10:07, 3.47it/s] 32%|███▏ | 119530/371472 [9:30:47<20:24:45, 3.43it/s] 32%|███▏ | 119531/371472 [9:30:47<19:42:32, 3.55it/s] 32%|███▏ | 119532/371472 [9:30:47<19:21:09, 3.62it/s] 32%|███▏ | 119533/371472 [9:30:47<19:13:46, 3.64it/s] 32%|███▏ | 119534/371472 [9:30:48<18:27:02, 3.79it/s] 32%|███▏ | 119535/371472 [9:30:48<18:09:57, 3.85it/s] 32%|███▏ | 119536/371472 [9:30:48<17:55:24, 3.90it/s] 32%|███▏ | 119537/371472 [9:30:48<18:14:04, 3.84it/s] 32%|███▏ | 119538/371472 [9:30:49<19:13:56, 3.64it/s] 32%|███▏ | 119539/371472 [9:30:49<18:54:43, 3.70it/s] 32%|███▏ | 119540/371472 [9:30:49<20:24:28, 3.43it/s] {'loss': 3.3703, 'learning_rate': 7.107080523174384e-07, 'epoch': 5.15} + 32%|███▏ | 119540/371472 [9:30:49<20:24:28, 3.43it/s] 32%|███▏ | 119541/371472 [9:30:50<21:38:32, 3.23it/s] 32%|███▏ | 119542/371472 [9:30:50<20:56:08, 3.34it/s] 32%|███▏ | 119543/371472 [9:30:50<21:14:59, 3.29it/s] 32%|███▏ | 119544/371472 [9:30:50<20:41:27, 3.38it/s] 32%|███▏ | 119545/371472 [9:30:51<20:25:26, 3.43it/s] 32%|███▏ | 119546/371472 [9:30:51<20:02:35, 3.49it/s] 32%|███▏ | 119547/371472 [9:30:51<19:53:11, 3.52it/s] 32%|███▏ | 119548/371472 [9:30:52<19:24:52, 3.60it/s] 32%|███▏ | 119549/371472 [9:30:52<18:57:58, 3.69it/s] 32%|███▏ | 119550/371472 [9:30:52<18:57:00, 3.69it/s] 32%|███▏ | 119551/371472 [9:30:52<19:01:05, 3.68it/s] 32%|███▏ | 119552/371472 [9:30:53<19:05:22, 3.67it/s] 32%|███▏ | 119553/371472 [9:30:53<18:21:16, 3.81it/s] 32%|███▏ | 119554/371472 [9:30:53<18:22:32, 3.81it/s] 32%|███▏ | 119555/371472 [9:30:53<18:23:18, 3.81it/s] 32%|███▏ | 119556/371472 [9:30:54<18:34:09, 3.77it/s] 32%|███▏ | 119557/371472 [9:30:54<19:21:44, 3.61it/s] 32%|███▏ | 119558/371472 [9:30:54<19:15:54, 3.63it/s] 32%|███▏ | 119559/371472 [9:30:55<19:36:11, 3.57it/s] 32%|███▏ | 119560/371472 [9:30:55<18:55:26, 3.70it/s] {'loss': 3.2941, 'learning_rate': 7.106595703419595e-07, 'epoch': 5.15} + 32%|███▏ | 119560/371472 [9:30:55<18:55:26, 3.70it/s] 32%|███▏ | 119561/371472 [9:30:55<19:51:23, 3.52it/s] 32%|███▏ | 119562/371472 [9:30:55<19:27:05, 3.60it/s] 32%|███▏ | 119563/371472 [9:30:56<18:21:30, 3.81it/s] 32%|███▏ | 119564/371472 [9:30:56<20:11:57, 3.46it/s] 32%|███▏ | 119565/371472 [9:30:56<20:40:11, 3.39it/s] 32%|███▏ | 119566/371472 [9:30:57<21:32:13, 3.25it/s] 32%|███▏ | 119567/371472 [9:30:57<20:45:31, 3.37it/s] 32%|███▏ | 119568/371472 [9:30:57<20:10:14, 3.47it/s] 32%|███▏ | 119569/371472 [9:30:57<20:10:42, 3.47it/s] 32%|███▏ | 119570/371472 [9:30:58<19:21:57, 3.61it/s] 32%|███▏ | 119571/371472 [9:30:58<19:50:43, 3.53it/s] 32%|███▏ | 119572/371472 [9:30:58<21:00:15, 3.33it/s] 32%|███▏ | 119573/371472 [9:30:59<20:46:00, 3.37it/s] 32%|███▏ | 119574/371472 [9:30:59<19:31:30, 3.58it/s] 32%|███▏ | 119575/371472 [9:30:59<20:46:49, 3.37it/s] 32%|███▏ | 119576/371472 [9:30:59<20:06:30, 3.48it/s] 32%|███▏ | 119577/371472 [9:31:00<19:22:06, 3.61it/s] 32%|███▏ | 119578/371472 [9:31:00<18:37:34, 3.76it/s] 32%|███▏ | 119579/371472 [9:31:00<19:40:12, 3.56it/s] 32%|███▏ | 119580/371472 [9:31:01<20:44:53, 3.37it/s] {'loss': 3.0823, 'learning_rate': 7.106110883664806e-07, 'epoch': 5.15} + 32%|███▏ | 119580/371472 [9:31:01<20:44:53, 3.37it/s] 32%|███▏ | 119581/371472 [9:31:01<20:11:21, 3.47it/s] 32%|███▏ | 119582/371472 [9:31:01<19:31:31, 3.58it/s] 32%|███▏ | 119583/371472 [9:31:01<18:32:22, 3.77it/s] 32%|███▏ | 119584/371472 [9:31:02<18:36:26, 3.76it/s] 32%|███▏ | 119585/371472 [9:31:02<18:54:46, 3.70it/s] 32%|███▏ | 119586/371472 [9:31:02<19:14:37, 3.64it/s] 32%|███▏ | 119587/371472 [9:31:02<18:57:14, 3.69it/s] 32%|███▏ | 119588/371472 [9:31:03<18:25:05, 3.80it/s] 32%|███▏ | 119589/371472 [9:31:03<19:36:25, 3.57it/s] 32%|███▏ | 119590/371472 [9:31:03<19:23:54, 3.61it/s] 32%|███▏ | 119591/371472 [9:31:04<18:49:15, 3.72it/s] 32%|███▏ | 119592/371472 [9:31:04<18:27:26, 3.79it/s] 32%|███▏ | 119593/371472 [9:31:04<18:41:55, 3.74it/s] 32%|███▏ | 119594/371472 [9:31:04<18:03:07, 3.88it/s] 32%|███▏ | 119595/371472 [9:31:05<19:17:42, 3.63it/s] 32%|███▏ | 119596/371472 [9:31:05<19:22:54, 3.61it/s] 32%|███▏ | 119597/371472 [9:31:05<19:03:40, 3.67it/s] 32%|███▏ | 119598/371472 [9:31:05<18:56:41, 3.69it/s] 32%|███▏ | 119599/371472 [9:31:06<19:15:25, 3.63it/s] 32%|███▏ | 119600/371472 [9:31:06<19:32:00, 3.58it/s] {'loss': 3.1861, 'learning_rate': 7.105626063910017e-07, 'epoch': 5.15} + 32%|███▏ | 119600/371472 [9:31:06<19:32:00, 3.58it/s] 32%|███▏ | 119601/371472 [9:31:06<20:00:24, 3.50it/s] 32%|███▏ | 119602/371472 [9:31:07<20:07:47, 3.48it/s] 32%|███▏ | 119603/371472 [9:31:07<20:04:43, 3.48it/s] 32%|███▏ | 119604/371472 [9:31:07<19:45:08, 3.54it/s] 32%|███▏ | 119605/371472 [9:31:07<20:39:26, 3.39it/s] 32%|███▏ | 119606/371472 [9:31:08<20:44:03, 3.37it/s] 32%|███▏ | 119607/371472 [9:31:08<20:16:20, 3.45it/s] 32%|███▏ | 119608/371472 [9:31:08<20:41:21, 3.38it/s] 32%|███▏ | 119609/371472 [9:31:09<19:49:06, 3.53it/s] 32%|███▏ | 119610/371472 [9:31:09<24:15:15, 2.88it/s] 32%|███▏ | 119611/371472 [9:31:10<27:10:39, 2.57it/s] 32%|███▏ | 119612/371472 [9:31:10<24:05:27, 2.90it/s] 32%|███▏ | 119613/371472 [9:31:10<23:57:44, 2.92it/s] 32%|███▏ | 119614/371472 [9:31:10<21:49:05, 3.21it/s] 32%|███▏ | 119615/371472 [9:31:11<21:07:40, 3.31it/s] 32%|███▏ | 119616/371472 [9:31:11<19:47:32, 3.53it/s] 32%|███▏ | 119617/371472 [9:31:11<18:52:49, 3.71it/s] 32%|███▏ | 119618/371472 [9:31:11<19:33:20, 3.58it/s] 32%|███▏ | 119619/371472 [9:31:12<18:48:59, 3.72it/s] 32%|███▏ | 119620/371472 [9:31:12<18:21:49, 3.81it/s] {'loss': 3.2413, 'learning_rate': 7.105141244155228e-07, 'epoch': 5.15} + 32%|███▏ | 119620/371472 [9:31:12<18:21:49, 3.81it/s] 32%|███▏ | 119621/371472 [9:31:12<19:15:09, 3.63it/s] 32%|███▏ | 119622/371472 [9:31:13<20:09:53, 3.47it/s] 32%|███▏ | 119623/371472 [9:31:13<20:23:05, 3.43it/s] 32%|███▏ | 119624/371472 [9:31:13<19:49:44, 3.53it/s] 32%|███▏ | 119625/371472 [9:31:13<19:50:48, 3.52it/s] 32%|███▏ | 119626/371472 [9:31:14<19:25:24, 3.60it/s] 32%|███▏ | 119627/371472 [9:31:14<20:07:20, 3.48it/s] 32%|███▏ | 119628/371472 [9:31:14<20:02:09, 3.49it/s] 32%|███▏ | 119629/371472 [9:31:15<19:26:08, 3.60it/s] 32%|███▏ | 119630/371472 [9:31:15<19:21:01, 3.62it/s] 32%|███▏ | 119631/371472 [9:31:15<19:47:34, 3.53it/s] 32%|███▏ | 119632/371472 [9:31:15<19:02:13, 3.67it/s] 32%|███▏ | 119633/371472 [9:31:16<19:31:30, 3.58it/s] 32%|███▏ | 119634/371472 [9:31:16<19:21:02, 3.62it/s] 32%|███▏ | 119635/371472 [9:31:16<19:06:42, 3.66it/s] 32%|███▏ | 119636/371472 [9:31:16<19:08:10, 3.66it/s] 32%|███▏ | 119637/371472 [9:31:17<19:04:01, 3.67it/s] 32%|███▏ | 119638/371472 [9:31:17<19:18:49, 3.62it/s] 32%|███▏ | 119639/371472 [9:31:17<19:15:54, 3.63it/s] 32%|███▏ | 119640/371472 [9:31:18<18:26:59, 3.79it/s] {'loss': 3.289, 'learning_rate': 7.104656424400439e-07, 'epoch': 5.15} + 32%|███▏ | 119640/371472 [9:31:18<18:26:59, 3.79it/s] 32%|███▏ | 119641/371472 [9:31:18<19:04:19, 3.67it/s] 32%|███▏ | 119642/371472 [9:31:18<20:45:34, 3.37it/s] 32%|███▏ | 119643/371472 [9:31:18<21:09:15, 3.31it/s] 32%|███▏ | 119644/371472 [9:31:19<20:32:04, 3.41it/s] 32%|███▏ | 119645/371472 [9:31:19<19:26:51, 3.60it/s] 32%|███▏ | 119646/371472 [9:31:19<18:33:20, 3.77it/s] 32%|███▏ | 119647/371472 [9:31:19<18:04:11, 3.87it/s] 32%|███▏ | 119648/371472 [9:31:20<18:22:20, 3.81it/s] 32%|███▏ | 119649/371472 [9:31:20<18:09:48, 3.85it/s] 32%|███▏ | 119650/371472 [9:31:20<17:55:44, 3.90it/s] 32%|███▏ | 119651/371472 [9:31:21<19:37:07, 3.57it/s] 32%|███▏ | 119652/371472 [9:31:21<19:15:38, 3.63it/s] 32%|███▏ | 119653/371472 [9:31:21<19:46:59, 3.54it/s] 32%|███▏ | 119654/371472 [9:31:21<19:42:36, 3.55it/s] 32%|███▏ | 119655/371472 [9:31:22<18:57:30, 3.69it/s] 32%|███▏ | 119656/371472 [9:31:22<18:20:12, 3.81it/s] 32%|███▏ | 119657/371472 [9:31:22<17:40:05, 3.96it/s] 32%|███▏ | 119658/371472 [9:31:22<17:23:01, 4.02it/s] 32%|███▏ | 119659/371472 [9:31:23<18:48:00, 3.72it/s] 32%|███▏ | 119660/371472 [9:31:23<18:29:40, 3.78it/s] {'loss': 3.1453, 'learning_rate': 7.10417160464565e-07, 'epoch': 5.15} + 32%|███▏ | 119660/371472 [9:31:23<18:29:40, 3.78it/s] 32%|███▏ | 119661/371472 [9:31:23<20:15:43, 3.45it/s] 32%|███▏ | 119662/371472 [9:31:24<19:24:57, 3.60it/s] 32%|███▏ | 119663/371472 [9:31:24<19:09:27, 3.65it/s] 32%|███▏ | 119664/371472 [9:31:24<19:56:58, 3.51it/s] 32%|███▏ | 119665/371472 [9:31:24<19:37:49, 3.56it/s] 32%|███▏ | 119666/371472 [9:31:25<18:33:26, 3.77it/s] 32%|███▏ | 119667/371472 [9:31:25<18:14:14, 3.84it/s] 32%|███▏ | 119668/371472 [9:31:25<18:48:23, 3.72it/s] 32%|███▏ | 119669/371472 [9:31:25<18:44:33, 3.73it/s] 32%|███▏ | 119670/371472 [9:31:26<18:25:53, 3.79it/s] 32%|███▏ | 119671/371472 [9:31:26<18:27:27, 3.79it/s] 32%|███▏ | 119672/371472 [9:31:26<18:08:37, 3.85it/s] 32%|███▏ | 119673/371472 [9:31:27<19:24:19, 3.60it/s] 32%|███▏ | 119674/371472 [9:31:27<19:44:18, 3.54it/s] 32%|███▏ | 119675/371472 [9:31:27<20:07:49, 3.47it/s] 32%|███▏ | 119676/371472 [9:31:27<19:25:14, 3.60it/s] 32%|███▏ | 119677/371472 [9:31:28<20:03:03, 3.49it/s] 32%|███▏ | 119678/371472 [9:31:28<19:33:48, 3.58it/s] 32%|███▏ | 119679/371472 [9:31:28<20:11:44, 3.46it/s] 32%|███▏ | 119680/371472 [9:31:29<20:04:28, 3.48it/s] {'loss': 3.3127, 'learning_rate': 7.103686784890861e-07, 'epoch': 5.15} + 32%|███▏ | 119680/371472 [9:31:29<20:04:28, 3.48it/s] 32%|███▏ | 119681/371472 [9:31:29<21:15:02, 3.29it/s] 32%|███▏ | 119682/371472 [9:31:29<21:40:24, 3.23it/s] 32%|███▏ | 119683/371472 [9:31:29<20:24:32, 3.43it/s] 32%|███▏ | 119684/371472 [9:31:30<19:36:50, 3.57it/s] 32%|███▏ | 119685/371472 [9:31:30<20:08:34, 3.47it/s] 32%|███▏ | 119686/371472 [9:31:30<21:12:39, 3.30it/s] 32%|███▏ | 119687/371472 [9:31:31<20:54:28, 3.35it/s] 32%|███▏ | 119688/371472 [9:31:31<21:30:48, 3.25it/s] 32%|███▏ | 119689/371472 [9:31:31<20:40:47, 3.38it/s] 32%|███▏ | 119690/371472 [9:31:31<20:08:01, 3.47it/s] 32%|███▏ | 119691/371472 [9:31:32<20:17:43, 3.45it/s] 32%|███▏ | 119692/371472 [9:31:32<20:08:12, 3.47it/s] 32%|███▏ | 119693/371472 [9:31:32<19:28:08, 3.59it/s] 32%|███▏ | 119694/371472 [9:31:33<19:14:21, 3.64it/s] 32%|███▏ | 119695/371472 [9:31:33<19:25:07, 3.60it/s] 32%|███▏ | 119696/371472 [9:31:33<19:31:19, 3.58it/s] 32%|███▏ | 119697/371472 [9:31:33<19:32:07, 3.58it/s] 32%|███▏ | 119698/371472 [9:31:34<18:43:59, 3.73it/s] 32%|███▏ | 119699/371472 [9:31:34<18:28:02, 3.79it/s] 32%|███▏ | 119700/371472 [9:31:34<18:47:58, 3.72it/s] {'loss': 3.253, 'learning_rate': 7.103201965136073e-07, 'epoch': 5.16} + 32%|███▏ | 119700/371472 [9:31:34<18:47:58, 3.72it/s] 32%|███▏ | 119701/371472 [9:31:34<18:21:07, 3.81it/s] 32%|███▏ | 119702/371472 [9:31:35<17:41:44, 3.95it/s] 32%|███▏ | 119703/371472 [9:31:35<19:14:11, 3.64it/s] 32%|███▏ | 119704/371472 [9:31:35<18:20:18, 3.81it/s] 32%|███▏ | 119705/371472 [9:31:36<18:12:53, 3.84it/s] 32%|███▏ | 119706/371472 [9:31:36<17:30:03, 4.00it/s] 32%|███▏ | 119707/371472 [9:31:36<17:45:53, 3.94it/s] 32%|███▏ | 119708/371472 [9:31:36<19:32:17, 3.58it/s] 32%|███▏ | 119709/371472 [9:31:37<19:10:49, 3.65it/s] 32%|███▏ | 119710/371472 [9:31:37<19:05:55, 3.66it/s] 32%|███▏ | 119711/371472 [9:31:37<19:56:17, 3.51it/s] 32%|███▏ | 119712/371472 [9:31:37<20:08:17, 3.47it/s] 32%|███▏ | 119713/371472 [9:31:38<19:19:56, 3.62it/s] 32%|███▏ | 119714/371472 [9:31:38<18:53:34, 3.70it/s] 32%|███▏ | 119715/371472 [9:31:38<18:30:01, 3.78it/s] 32%|███▏ | 119716/371472 [9:31:39<19:28:36, 3.59it/s] 32%|███▏ | 119717/371472 [9:31:39<19:00:58, 3.68it/s] 32%|███▏ | 119718/371472 [9:31:39<19:30:29, 3.58it/s] 32%|███▏ | 119719/371472 [9:31:39<20:20:38, 3.44it/s] 32%|███▏ | 119720/371472 [9:31:40<20:18:03, 3.44it/s] {'loss': 3.4088, 'learning_rate': 7.102717145381284e-07, 'epoch': 5.16} + 32%|███▏ | 119720/371472 [9:31:40<20:18:03, 3.44it/s] 32%|███▏ | 119721/371472 [9:31:40<20:58:24, 3.33it/s] 32%|███▏ | 119722/371472 [9:31:40<22:16:38, 3.14it/s] 32%|███▏ | 119723/371472 [9:31:41<20:53:11, 3.35it/s] 32%|███▏ | 119724/371472 [9:31:41<23:19:54, 3.00it/s] 32%|███▏ | 119725/371472 [9:31:41<22:22:41, 3.12it/s] 32%|███▏ | 119726/371472 [9:31:42<22:39:42, 3.09it/s] 32%|███▏ | 119727/371472 [9:31:42<22:51:15, 3.06it/s] 32%|███▏ | 119728/371472 [9:31:42<21:43:53, 3.22it/s] 32%|███▏ | 119729/371472 [9:31:43<21:56:57, 3.19it/s] 32%|███▏ | 119730/371472 [9:31:43<21:26:06, 3.26it/s] 32%|███▏ | 119731/371472 [9:31:43<20:51:04, 3.35it/s] 32%|███▏ | 119732/371472 [9:31:43<20:55:21, 3.34it/s] 32%|███▏ | 119733/371472 [9:31:44<21:27:53, 3.26it/s] 32%|███▏ | 119734/371472 [9:31:44<20:31:17, 3.41it/s] 32%|███▏ | 119735/371472 [9:31:44<19:14:04, 3.64it/s] 32%|███▏ | 119736/371472 [9:31:45<19:47:53, 3.53it/s] 32%|███▏ | 119737/371472 [9:31:45<19:47:12, 3.53it/s] 32%|███▏ | 119738/371472 [9:31:45<19:22:49, 3.61it/s] 32%|███▏ | 119739/371472 [9:31:45<20:09:26, 3.47it/s] 32%|███▏ | 119740/371472 [9:31:46<19:18:28, 3.62it/s] {'loss': 3.1496, 'learning_rate': 7.102232325626494e-07, 'epoch': 5.16} + 32%|███▏ | 119740/371472 [9:31:46<19:18:28, 3.62it/s] 32%|███▏ | 119741/371472 [9:31:46<19:00:25, 3.68it/s] 32%|███▏ | 119742/371472 [9:31:46<18:50:29, 3.71it/s] 32%|███▏ | 119743/371472 [9:31:47<20:05:07, 3.48it/s] 32%|███▏ | 119744/371472 [9:31:47<19:55:34, 3.51it/s] 32%|███▏ | 119745/371472 [9:31:47<20:55:18, 3.34it/s] 32%|███▏ | 119746/371472 [9:31:47<20:20:57, 3.44it/s] 32%|███▏ | 119747/371472 [9:31:48<19:46:30, 3.54it/s] 32%|███▏ | 119748/371472 [9:31:48<21:25:49, 3.26it/s] 32%|███▏ | 119749/371472 [9:31:48<20:51:17, 3.35it/s] 32%|███▏ | 119750/371472 [9:31:49<22:30:00, 3.11it/s] 32%|███▏ | 119751/371472 [9:31:49<21:58:12, 3.18it/s] 32%|███▏ | 119752/371472 [9:31:49<21:59:28, 3.18it/s] 32%|███▏ | 119753/371472 [9:31:50<22:13:58, 3.14it/s] 32%|███▏ | 119754/371472 [9:31:50<20:48:57, 3.36it/s] 32%|███▏ | 119755/371472 [9:31:50<20:23:50, 3.43it/s] 32%|███▏ | 119756/371472 [9:31:50<20:00:11, 3.50it/s] 32%|███▏ | 119757/371472 [9:31:51<19:18:12, 3.62it/s] 32%|███▏ | 119758/371472 [9:31:51<20:07:44, 3.47it/s] 32%|███▏ | 119759/371472 [9:31:51<19:37:03, 3.56it/s] 32%|███▏ | 119760/371472 [9:31:52<19:51:16, 3.52it/s] {'loss': 3.1972, 'learning_rate': 7.101747505871705e-07, 'epoch': 5.16} + 32%|███▏ | 119760/371472 [9:31:52<19:51:16, 3.52it/s] 32%|███▏ | 119761/371472 [9:31:52<21:03:22, 3.32it/s] 32%|███▏ | 119762/371472 [9:31:52<20:00:22, 3.49it/s] 32%|███▏ | 119763/371472 [9:31:53<20:56:54, 3.34it/s] 32%|███▏ | 119764/371472 [9:31:53<21:08:58, 3.31it/s] 32%|███▏ | 119765/371472 [9:31:53<20:14:54, 3.45it/s] 32%|███▏ | 119766/371472 [9:31:53<19:26:07, 3.60it/s] 32%|███▏ | 119767/371472 [9:31:54<19:01:26, 3.68it/s] 32%|███▏ | 119768/371472 [9:31:54<20:28:16, 3.42it/s] 32%|███▏ | 119769/371472 [9:31:54<20:47:24, 3.36it/s] 32%|███▏ | 119770/371472 [9:31:54<19:59:29, 3.50it/s] 32%|███▏ | 119771/371472 [9:31:55<19:32:40, 3.58it/s] 32%|███▏ | 119772/371472 [9:31:55<18:53:26, 3.70it/s] 32%|███▏ | 119773/371472 [9:31:55<18:50:14, 3.71it/s] 32%|███▏ | 119774/371472 [9:31:56<18:22:35, 3.80it/s] 32%|███▏ | 119775/371472 [9:31:56<18:50:13, 3.71it/s] 32%|███▏ | 119776/371472 [9:31:56<18:25:18, 3.80it/s] 32%|███▏ | 119777/371472 [9:31:56<18:03:19, 3.87it/s] 32%|███▏ | 119778/371472 [9:31:57<19:23:06, 3.61it/s] 32%|███▏ | 119779/371472 [9:31:57<19:14:10, 3.63it/s] 32%|███▏ | 119780/371472 [9:31:57<19:07:45, 3.65it/s] {'loss': 3.3144, 'learning_rate': 7.101262686116917e-07, 'epoch': 5.16} + 32%|███▏ | 119780/371472 [9:31:57<19:07:45, 3.65it/s] 32%|███▏ | 119781/371472 [9:31:57<20:01:32, 3.49it/s] 32%|███▏ | 119782/371472 [9:31:58<19:10:33, 3.65it/s] 32%|███▏ | 119783/371472 [9:31:58<19:09:15, 3.65it/s] 32%|███▏ | 119784/371472 [9:31:58<19:36:27, 3.57it/s] 32%|███▏ | 119785/371472 [9:31:59<20:12:30, 3.46it/s] 32%|███▏ | 119786/371472 [9:31:59<20:03:38, 3.49it/s] 32%|███▏ | 119787/371472 [9:31:59<19:39:28, 3.56it/s] 32%|███▏ | 119788/371472 [9:31:59<19:28:33, 3.59it/s] 32%|███▏ | 119789/371472 [9:32:00<18:40:44, 3.74it/s] 32%|███▏ | 119790/371472 [9:32:00<18:38:15, 3.75it/s] 32%|███▏ | 119791/371472 [9:32:00<19:11:32, 3.64it/s] 32%|███▏ | 119792/371472 [9:32:01<20:10:51, 3.46it/s] 32%|███▏ | 119793/371472 [9:32:01<19:14:16, 3.63it/s] 32%|███▏ | 119794/371472 [9:32:01<19:03:50, 3.67it/s] 32%|███▏ | 119795/371472 [9:32:01<22:07:16, 3.16it/s] 32%|███▏ | 119796/371472 [9:32:02<20:42:20, 3.38it/s] 32%|███▏ | 119797/371472 [9:32:02<21:23:24, 3.27it/s] 32%|███▏ | 119798/371472 [9:32:02<20:41:19, 3.38it/s] 32%|███▏ | 119799/371472 [9:32:03<20:08:47, 3.47it/s] 32%|███▏ | 119800/371472 [9:32:03<22:24:40, 3.12it/s] {'loss': 3.2915, 'learning_rate': 7.100777866362128e-07, 'epoch': 5.16} + 32%|███▏ | 119800/371472 [9:32:03<22:24:40, 3.12it/s] 32%|███▏ | 119801/371472 [9:32:03<22:56:18, 3.05it/s] 32%|███▏ | 119802/371472 [9:32:04<21:35:02, 3.24it/s] 32%|███▏ | 119803/371472 [9:32:04<21:28:06, 3.26it/s] 32%|███▏ | 119804/371472 [9:32:04<20:15:50, 3.45it/s] 32%|███▏ | 119805/371472 [9:32:04<20:30:25, 3.41it/s] 32%|███▏ | 119806/371472 [9:32:05<20:23:25, 3.43it/s] 32%|███▏ | 119807/371472 [9:32:05<20:07:38, 3.47it/s] 32%|███▏ | 119808/371472 [9:32:05<19:24:33, 3.60it/s] 32%|███▏ | 119809/371472 [9:32:06<20:17:08, 3.45it/s] 32%|███▏ | 119810/371472 [9:32:06<19:23:47, 3.60it/s] 32%|███▏ | 119811/371472 [9:32:06<18:55:39, 3.69it/s] 32%|███▏ | 119812/371472 [9:32:06<19:10:16, 3.65it/s] 32%|███▏ | 119813/371472 [9:32:07<18:42:51, 3.74it/s] 32%|███▏ | 119814/371472 [9:32:07<18:47:06, 3.72it/s] 32%|███▏ | 119815/371472 [9:32:07<19:28:49, 3.59it/s] 32%|███▏ | 119816/371472 [9:32:08<22:16:42, 3.14it/s] 32%|███▏ | 119817/371472 [9:32:08<21:44:18, 3.22it/s] 32%|███▏ | 119818/371472 [9:32:08<21:23:38, 3.27it/s] 32%|███▏ | 119819/371472 [9:32:08<20:03:00, 3.49it/s] 32%|███▏ | 119820/371472 [9:32:09<20:24:26, 3.43it/s] {'loss': 3.2987, 'learning_rate': 7.100293046607338e-07, 'epoch': 5.16} + 32%|███▏ | 119820/371472 [9:32:09<20:24:26, 3.43it/s] 32%|███▏ | 119821/371472 [9:32:09<19:46:28, 3.53it/s] 32%|███▏ | 119822/371472 [9:32:09<19:20:16, 3.61it/s] 32%|███▏ | 119823/371472 [9:32:10<19:50:08, 3.52it/s] 32%|███▏ | 119824/371472 [9:32:10<20:07:16, 3.47it/s] 32%|███▏ | 119825/371472 [9:32:10<22:11:40, 3.15it/s] 32%|███▏ | 119826/371472 [9:32:11<21:06:19, 3.31it/s] 32%|███▏ | 119827/371472 [9:32:11<21:27:03, 3.26it/s] 32%|███▏ | 119828/371472 [9:32:11<21:48:30, 3.21it/s] 32%|███▏ | 119829/371472 [9:32:11<21:04:00, 3.32it/s] 32%|███▏ | 119830/371472 [9:32:12<21:41:13, 3.22it/s] 32%|███▏ | 119831/371472 [9:32:12<20:56:33, 3.34it/s] 32%|███▏ | 119832/371472 [9:32:12<20:54:35, 3.34it/s] 32%|███▏ | 119833/371472 [9:32:13<19:33:45, 3.57it/s] 32%|███▏ | 119834/371472 [9:32:13<21:17:36, 3.28it/s] 32%|███▏ | 119835/371472 [9:32:13<20:38:30, 3.39it/s] 32%|███▏ | 119836/371472 [9:32:13<20:06:19, 3.48it/s] 32%|███▏ | 119837/371472 [9:32:14<21:25:01, 3.26it/s] 32%|███▏ | 119838/371472 [9:32:14<21:23:34, 3.27it/s] 32%|███▏ | 119839/371472 [9:32:14<22:08:15, 3.16it/s] 32%|███▏ | 119840/371472 [9:32:15<21:56:36, 3.19it/s] {'loss': 3.3093, 'learning_rate': 7.09980822685255e-07, 'epoch': 5.16} + 32%|███▏ | 119840/371472 [9:32:15<21:56:36, 3.19it/s] 32%|███▏ | 119841/371472 [9:32:15<20:15:53, 3.45it/s] 32%|███▏ | 119842/371472 [9:32:15<20:33:49, 3.40it/s] 32%|███▏ | 119843/371472 [9:32:16<21:08:08, 3.31it/s] 32%|███▏ | 119844/371472 [9:32:16<20:19:07, 3.44it/s] 32%|███▏ | 119845/371472 [9:32:16<20:23:47, 3.43it/s] 32%|███▏ | 119846/371472 [9:32:16<20:12:02, 3.46it/s] 32%|███▏ | 119847/371472 [9:32:17<21:57:43, 3.18it/s] 32%|███▏ | 119848/371472 [9:32:17<21:54:25, 3.19it/s] 32%|███▏ | 119849/371472 [9:32:17<21:28:05, 3.26it/s] 32%|███▏ | 119850/371472 [9:32:18<20:36:04, 3.39it/s] 32%|███▏ | 119851/371472 [9:32:18<19:48:39, 3.53it/s] 32%|███▏ | 119852/371472 [9:32:18<20:02:38, 3.49it/s] 32%|███▏ | 119853/371472 [9:32:19<19:43:33, 3.54it/s] 32%|███▏ | 119854/371472 [9:32:19<20:26:56, 3.42it/s] 32%|███▏ | 119855/371472 [9:32:19<20:15:12, 3.45it/s] 32%|███▏ | 119856/371472 [9:32:19<19:49:50, 3.52it/s] 32%|███▏ | 119857/371472 [9:32:20<19:34:08, 3.57it/s] 32%|███▏ | 119858/371472 [9:32:20<19:24:07, 3.60it/s] 32%|███▏ | 119859/371472 [9:32:20<18:35:22, 3.76it/s] 32%|███▏ | 119860/371472 [9:32:20<18:49:36, 3.71it/s] {'loss': 3.0692, 'learning_rate': 7.099323407097761e-07, 'epoch': 5.16} + 32%|███▏ | 119860/371472 [9:32:20<18:49:36, 3.71it/s] 32%|███▏ | 119861/371472 [9:32:21<18:19:25, 3.81it/s] 32%|███▏ | 119862/371472 [9:32:21<17:54:04, 3.90it/s] 32%|███▏ | 119863/371472 [9:32:21<19:32:01, 3.58it/s] 32%|███▏ | 119864/371472 [9:32:22<18:51:11, 3.71it/s] 32%|███▏ | 119865/371472 [9:32:22<18:49:25, 3.71it/s] 32%|███▏ | 119866/371472 [9:32:22<18:29:37, 3.78it/s] 32%|███▏ | 119867/371472 [9:32:22<19:19:52, 3.62it/s] 32%|███▏ | 119868/371472 [9:32:23<20:56:08, 3.34it/s] 32%|███▏ | 119869/371472 [9:32:23<20:11:17, 3.46it/s] 32%|███▏ | 119870/371472 [9:32:23<20:35:38, 3.39it/s] 32%|███▏ | 119871/371472 [9:32:24<19:54:45, 3.51it/s] 32%|███▏ | 119872/371472 [9:32:24<20:04:27, 3.48it/s] 32%|███▏ | 119873/371472 [9:32:24<19:22:16, 3.61it/s] 32%|███▏ | 119874/371472 [9:32:25<22:46:04, 3.07it/s] 32%|███▏ | 119875/371472 [9:32:25<21:57:52, 3.18it/s] 32%|███▏ | 119876/371472 [9:32:25<21:13:21, 3.29it/s] 32%|███▏ | 119877/371472 [9:32:25<20:26:10, 3.42it/s] 32%|███▏ | 119878/371472 [9:32:26<20:30:41, 3.41it/s] 32%|███▏ | 119879/371472 [9:32:26<19:33:36, 3.57it/s] 32%|███▏ | 119880/371472 [9:32:26<19:14:46, 3.63it/s] {'loss': 3.2909, 'learning_rate': 7.098838587342972e-07, 'epoch': 5.16} + 32%|███▏ | 119880/371472 [9:32:26<19:14:46, 3.63it/s] 32%|███▏ | 119881/371472 [9:32:26<19:05:52, 3.66it/s] 32%|███▏ | 119882/371472 [9:32:27<19:30:28, 3.58it/s] 32%|███▏ | 119883/371472 [9:32:27<20:02:28, 3.49it/s] 32%|███▏ | 119884/371472 [9:32:27<19:24:18, 3.60it/s] 32%|███▏ | 119885/371472 [9:32:28<19:55:05, 3.51it/s] 32%|███▏ | 119886/371472 [9:32:28<20:03:29, 3.48it/s] 32%|███▏ | 119887/371472 [9:32:28<19:42:23, 3.55it/s] 32%|███▏ | 119888/371472 [9:32:28<18:49:31, 3.71it/s] 32%|███▏ | 119889/371472 [9:32:29<19:02:41, 3.67it/s] 32%|███▏ | 119890/371472 [9:32:29<19:15:41, 3.63it/s] 32%|███▏ | 119891/371472 [9:32:29<18:17:24, 3.82it/s] 32%|███▏ | 119892/371472 [9:32:29<18:21:38, 3.81it/s] 32%|███▏ | 119893/371472 [9:32:30<18:54:48, 3.69it/s] 32%|███▏ | 119894/371472 [9:32:30<19:06:45, 3.66it/s] 32%|███▏ | 119895/371472 [9:32:30<19:21:28, 3.61it/s] 32%|███▏ | 119896/371472 [9:32:31<19:25:17, 3.60it/s] 32%|███▏ | 119897/371472 [9:32:31<18:52:02, 3.70it/s] 32%|███▏ | 119898/371472 [9:32:31<19:12:40, 3.64it/s] 32%|███▏ | 119899/371472 [9:32:31<18:36:59, 3.75it/s] 32%|███▏ | 119900/371472 [9:32:32<19:46:56, 3.53it/s] {'loss': 3.1687, 'learning_rate': 7.098353767588182e-07, 'epoch': 5.16} + 32%|███▏ | 119900/371472 [9:32:32<19:46:56, 3.53it/s] 32%|███▏ | 119901/371472 [9:32:32<19:06:34, 3.66it/s] 32%|███▏ | 119902/371472 [9:32:32<19:06:27, 3.66it/s] 32%|███▏ | 119903/371472 [9:32:33<19:44:18, 3.54it/s] 32%|███▏ | 119904/371472 [9:32:33<20:11:02, 3.46it/s] 32%|███▏ | 119905/371472 [9:32:33<20:12:28, 3.46it/s] 32%|███▏ | 119906/371472 [9:32:33<19:54:28, 3.51it/s] 32%|███▏ | 119907/371472 [9:32:34<18:58:29, 3.68it/s] 32%|███▏ | 119908/371472 [9:32:34<20:00:31, 3.49it/s] 32%|███▏ | 119909/371472 [9:32:34<19:56:59, 3.50it/s] 32%|███▏ | 119910/371472 [9:32:35<18:59:36, 3.68it/s] 32%|███▏ | 119911/371472 [9:32:35<19:47:51, 3.53it/s] 32%|███▏ | 119912/371472 [9:32:35<19:13:48, 3.63it/s] 32%|███▏ | 119913/371472 [9:32:35<18:39:14, 3.75it/s] 32%|███▏ | 119914/371472 [9:32:36<20:19:51, 3.44it/s] 32%|███▏ | 119915/371472 [9:32:36<20:40:47, 3.38it/s] 32%|███▏ | 119916/371472 [9:32:36<22:04:15, 3.17it/s] 32%|███▏ | 119917/371472 [9:32:37<20:56:04, 3.34it/s] 32%|███▏ | 119918/371472 [9:32:37<20:18:51, 3.44it/s] 32%|███▏ | 119919/371472 [9:32:37<20:56:49, 3.34it/s] 32%|███▏ | 119920/371472 [9:32:38<21:21:32, 3.27it/s] {'loss': 3.2103, 'learning_rate': 7.097868947833394e-07, 'epoch': 5.17} + 32%|███▏ | 119920/371472 [9:32:38<21:21:32, 3.27it/s] 32%|███▏ | 119921/371472 [9:32:38<21:44:39, 3.21it/s] 32%|███▏ | 119922/371472 [9:32:38<21:05:42, 3.31it/s] 32%|███▏ | 119923/371472 [9:32:38<21:31:35, 3.25it/s] 32%|███▏ | 119924/371472 [9:32:39<20:47:19, 3.36it/s] 32%|███▏ | 119925/371472 [9:32:39<20:10:26, 3.46it/s] 32%|███▏ | 119926/371472 [9:32:39<19:40:24, 3.55it/s] 32%|███▏ | 119927/371472 [9:32:39<19:08:28, 3.65it/s] 32%|███▏ | 119928/371472 [9:32:40<18:55:08, 3.69it/s] 32%|███▏ | 119929/371472 [9:32:40<19:51:30, 3.52it/s] 32%|███▏ | 119930/371472 [9:32:40<19:48:05, 3.53it/s] 32%|███▏ | 119931/371472 [9:32:41<19:28:30, 3.59it/s] 32%|███▏ | 119932/371472 [9:32:41<20:55:26, 3.34it/s] 32%|███▏ | 119933/371472 [9:32:41<19:34:10, 3.57it/s] 32%|███▏ | 119934/371472 [9:32:41<19:47:27, 3.53it/s] 32%|███▏ | 119935/371472 [9:32:42<19:25:41, 3.60it/s] 32%|███▏ | 119936/371472 [9:32:42<20:08:57, 3.47it/s] 32%|███▏ | 119937/371472 [9:32:42<19:25:45, 3.60it/s] 32%|███▏ | 119938/371472 [9:32:43<19:21:23, 3.61it/s] 32%|███▏ | 119939/371472 [9:32:43<19:48:37, 3.53it/s] 32%|███▏ | 119940/371472 [9:32:43<20:53:16, 3.34it/s] {'loss': 3.1913, 'learning_rate': 7.097384128078606e-07, 'epoch': 5.17} + 32%|███▏ | 119940/371472 [9:32:43<20:53:16, 3.34it/s] 32%|███▏ | 119941/371472 [9:32:44<22:47:45, 3.07it/s] 32%|███▏ | 119942/371472 [9:32:44<21:22:28, 3.27it/s] 32%|███▏ | 119943/371472 [9:32:44<21:19:28, 3.28it/s] 32%|███▏ | 119944/371472 [9:32:44<19:45:59, 3.53it/s] 32%|███▏ | 119945/371472 [9:32:45<20:08:03, 3.47it/s] 32%|███▏ | 119946/371472 [9:32:45<21:03:26, 3.32it/s] 32%|███▏ | 119947/371472 [9:32:45<21:13:55, 3.29it/s] 32%|███▏ | 119948/371472 [9:32:46<21:02:20, 3.32it/s] 32%|███▏ | 119949/371472 [9:32:46<20:03:52, 3.48it/s] 32%|███▏ | 119950/371472 [9:32:46<19:44:08, 3.54it/s] 32%|███▏ | 119951/371472 [9:32:46<19:39:21, 3.55it/s] 32%|███▏ | 119952/371472 [9:32:47<19:20:10, 3.61it/s] 32%|███▏ | 119953/371472 [9:32:47<19:17:24, 3.62it/s] 32%|███▏ | 119954/371472 [9:32:47<20:00:44, 3.49it/s] 32%|███▏ | 119955/371472 [9:32:48<19:12:20, 3.64it/s] 32%|███▏ | 119956/371472 [9:32:48<20:16:56, 3.44it/s] 32%|███▏ | 119957/371472 [9:32:48<21:17:51, 3.28it/s] 32%|███▏ | 119958/371472 [9:32:49<21:02:13, 3.32it/s] 32%|███▏ | 119959/371472 [9:32:49<20:22:26, 3.43it/s] 32%|███▏ | 119960/371472 [9:32:49<19:49:39, 3.52it/s] {'loss': 3.2558, 'learning_rate': 7.096899308323817e-07, 'epoch': 5.17} + 32%|███▏ | 119960/371472 [9:32:49<19:49:39, 3.52it/s] 32%|███▏ | 119961/371472 [9:32:49<18:43:04, 3.73it/s] 32%|███▏ | 119962/371472 [9:32:50<19:58:18, 3.50it/s] 32%|███▏ | 119963/371472 [9:32:50<19:59:00, 3.50it/s] 32%|███▏ | 119964/371472 [9:32:50<19:50:57, 3.52it/s] 32%|███▏ | 119965/371472 [9:32:50<19:27:27, 3.59it/s] 32%|███▏ | 119966/371472 [9:32:51<19:18:21, 3.62it/s] 32%|███▏ | 119967/371472 [9:32:51<18:50:37, 3.71it/s] 32%|███▏ | 119968/371472 [9:32:51<18:34:58, 3.76it/s] 32%|███▏ | 119969/371472 [9:32:51<18:40:09, 3.74it/s] 32%|███▏ | 119970/371472 [9:32:52<18:17:57, 3.82it/s] 32%|███▏ | 119971/371472 [9:32:52<17:59:19, 3.88it/s] 32%|███▏ | 119972/371472 [9:32:52<19:09:27, 3.65it/s] 32%|███▏ | 119973/371472 [9:32:53<22:05:41, 3.16it/s] 32%|███▏ | 119974/371472 [9:32:53<22:00:15, 3.17it/s] 32%|███▏ | 119975/371472 [9:32:53<20:53:40, 3.34it/s] 32%|███▏ | 119976/371472 [9:32:54<19:47:07, 3.53it/s] 32%|███▏ | 119977/371472 [9:32:54<19:45:21, 3.54it/s] 32%|███▏ | 119978/371472 [9:32:54<20:06:38, 3.47it/s] 32%|███▏ | 119979/371472 [9:32:54<20:00:53, 3.49it/s] 32%|███▏ | 119980/371472 [9:32:55<20:23:42, 3.43it/s] {'loss': 3.4224, 'learning_rate': 7.096414488569027e-07, 'epoch': 5.17} + 32%|███▏ | 119980/371472 [9:32:55<20:23:42, 3.43it/s] 32%|███▏ | 119981/371472 [9:32:55<20:34:11, 3.40it/s] 32%|███▏ | 119982/371472 [9:32:55<20:52:44, 3.35it/s] 32%|███▏ | 119983/371472 [9:32:56<19:37:27, 3.56it/s] 32%|███▏ | 119984/371472 [9:32:56<19:45:23, 3.54it/s] 32%|███▏ | 119985/371472 [9:32:56<18:58:37, 3.68it/s] 32%|███▏ | 119986/371472 [9:32:56<19:00:54, 3.67it/s] 32%|███▏ | 119987/371472 [9:32:57<18:40:54, 3.74it/s] 32%|███▏ | 119988/371472 [9:32:57<18:34:46, 3.76it/s] 32%|███▏ | 119989/371472 [9:32:57<18:52:28, 3.70it/s] 32%|███▏ | 119990/371472 [9:32:58<20:21:55, 3.43it/s] 32%|███▏ | 119991/371472 [9:32:58<20:04:06, 3.48it/s] 32%|███▏ | 119992/371472 [9:32:58<19:45:36, 3.54it/s] 32%|███▏ | 119993/371472 [9:32:58<19:46:57, 3.53it/s] 32%|███▏ | 119994/371472 [9:32:59<20:22:00, 3.43it/s] 32%|███▏ | 119995/371472 [9:32:59<19:31:34, 3.58it/s] 32%|███▏ | 119996/371472 [9:32:59<18:57:22, 3.69it/s] 32%|███▏ | 119997/371472 [9:32:59<19:27:35, 3.59it/s] 32%|███▏ | 119998/371472 [9:33:00<19:35:29, 3.57it/s] 32%|███▏ | 119999/371472 [9:33:00<18:49:08, 3.71it/s] 32%|███▏ | 120000/371472 [9:33:00<19:12:38, 3.64it/s] {'loss': 3.4945, 'learning_rate': 7.095929668814238e-07, 'epoch': 5.17} + 32%|███▏ | 120000/371472 [9:33:00<19:12:38, 3.64it/s] 32%|███▏ | 120001/371472 [9:33:01<19:33:27, 3.57it/s] 32%|███▏ | 120002/371472 [9:33:01<18:56:48, 3.69it/s] 32%|███▏ | 120003/371472 [9:33:01<18:37:09, 3.75it/s] 32%|███▏ | 120004/371472 [9:33:01<18:04:25, 3.86it/s] 32%|███▏ | 120005/371472 [9:33:02<18:33:06, 3.77it/s] 32%|███▏ | 120006/371472 [9:33:02<18:14:05, 3.83it/s] 32%|███▏ | 120007/371472 [9:33:02<17:36:41, 3.97it/s] 32%|███▏ | 120008/371472 [9:33:02<18:03:04, 3.87it/s] 32%|███▏ | 120009/371472 [9:33:03<17:48:41, 3.92it/s] 32%|███▏ | 120010/371472 [9:33:03<18:14:28, 3.83it/s] 32%|███▏ | 120011/371472 [9:33:03<18:23:13, 3.80it/s] 32%|███▏ | 120012/371472 [9:33:03<20:02:20, 3.49it/s] 32%|███▏ | 120013/371472 [9:33:04<19:35:33, 3.57it/s] 32%|███▏ | 120014/371472 [9:33:04<19:25:32, 3.60it/s] 32%|███▏ | 120015/371472 [9:33:04<18:34:40, 3.76it/s] 32%|███▏ | 120016/371472 [9:33:05<19:26:21, 3.59it/s] 32%|███▏ | 120017/371472 [9:33:05<18:48:32, 3.71it/s] 32%|███▏ | 120018/371472 [9:33:05<18:22:33, 3.80it/s] 32%|███▏ | 120019/371472 [9:33:05<17:48:08, 3.92it/s] 32%|███▏ | 120020/371472 [9:33:06<17:54:08, 3.90it/s] {'loss': 3.4162, 'learning_rate': 7.09544484905945e-07, 'epoch': 5.17} + 32%|███▏ | 120020/371472 [9:33:06<17:54:08, 3.90it/s] 32%|███▏ | 120021/371472 [9:33:06<18:14:59, 3.83it/s] 32%|���██▏ | 120022/371472 [9:33:06<18:18:27, 3.82it/s] 32%|███▏ | 120023/371472 [9:33:06<18:44:39, 3.73it/s] 32%|███▏ | 120024/371472 [9:33:07<18:25:13, 3.79it/s] 32%|███▏ | 120025/371472 [9:33:07<19:13:25, 3.63it/s] 32%|███▏ | 120026/371472 [9:33:07<19:35:49, 3.56it/s] 32%|███▏ | 120027/371472 [9:33:07<19:20:48, 3.61it/s] 32%|███▏ | 120028/371472 [9:33:08<19:57:45, 3.50it/s] 32%|███▏ | 120029/371472 [9:33:08<19:09:12, 3.65it/s] 32%|███▏ | 120030/371472 [9:33:08<19:19:38, 3.61it/s] 32%|███▏ | 120031/371472 [9:33:09<21:26:50, 3.26it/s] 32%|███▏ | 120032/371472 [9:33:09<20:21:39, 3.43it/s] 32%|███▏ | 120033/371472 [9:33:09<19:54:38, 3.51it/s] 32%|███▏ | 120034/371472 [9:33:09<19:14:31, 3.63it/s] 32%|███▏ | 120035/371472 [9:33:10<18:38:27, 3.75it/s] 32%|███▏ | 120036/371472 [9:33:10<18:23:46, 3.80it/s] 32%|███▏ | 120037/371472 [9:33:10<18:13:36, 3.83it/s] 32%|███▏ | 120038/371472 [9:33:10<17:44:23, 3.94it/s] 32%|███▏ | 120039/371472 [9:33:11<17:25:54, 4.01it/s] 32%|███▏ | 120040/371472 [9:33:11<17:21:58, 4.02it/s] {'loss': 3.1287, 'learning_rate': 7.094960029304661e-07, 'epoch': 5.17} + 32%|███▏ | 120040/371472 [9:33:11<17:21:58, 4.02it/s] 32%|███▏ | 120041/371472 [9:33:11<18:10:57, 3.84it/s] 32%|███▏ | 120042/371472 [9:33:12<18:23:18, 3.80it/s] 32%|███▏ | 120043/371472 [9:33:12<19:06:54, 3.65it/s] 32%|███▏ | 120044/371472 [9:33:12<19:26:17, 3.59it/s] 32%|███▏ | 120045/371472 [9:33:12<19:39:56, 3.55it/s] 32%|███▏ | 120046/371472 [9:33:13<19:45:37, 3.53it/s] 32%|███▏ | 120047/371472 [9:33:13<19:59:18, 3.49it/s] 32%|███▏ | 120048/371472 [9:33:13<19:52:55, 3.51it/s] 32%|███▏ | 120049/371472 [9:33:14<19:29:01, 3.58it/s] 32%|███▏ | 120050/371472 [9:33:14<18:52:54, 3.70it/s] 32%|███▏ | 120051/371472 [9:33:14<21:24:57, 3.26it/s] 32%|███▏ | 120052/371472 [9:33:14<21:10:35, 3.30it/s] 32%|███▏ | 120053/371472 [9:33:15<21:39:41, 3.22it/s] 32%|███▏ | 120054/371472 [9:33:15<22:13:27, 3.14it/s] 32%|███▏ | 120055/371472 [9:33:15<21:16:17, 3.28it/s] 32%|███▏ | 120056/371472 [9:33:16<20:36:13, 3.39it/s] 32%|███▏ | 120057/371472 [9:33:16<20:40:25, 3.38it/s] 32%|███▏ | 120058/371472 [9:33:16<20:41:56, 3.37it/s] 32%|███▏ | 120059/371472 [9:33:17<19:51:18, 3.52it/s] 32%|███▏ | 120060/371472 [9:33:17<18:54:17, 3.69it/s] {'loss': 3.2619, 'learning_rate': 7.094475209549871e-07, 'epoch': 5.17} + 32%|███▏ | 120060/371472 [9:33:17<18:54:17, 3.69it/s] 32%|███▏ | 120061/371472 [9:33:17<19:40:42, 3.55it/s] 32%|███▏ | 120062/371472 [9:33:17<20:13:26, 3.45it/s] 32%|███▏ | 120063/371472 [9:33:18<21:13:18, 3.29it/s] 32%|███▏ | 120064/371472 [9:33:18<21:32:48, 3.24it/s] 32%|███▏ | 120065/371472 [9:33:18<22:32:52, 3.10it/s] 32%|███▏ | 120066/371472 [9:33:19<21:26:51, 3.26it/s] 32%|███▏ | 120067/371472 [9:33:19<20:05:14, 3.48it/s] 32%|███▏ | 120068/371472 [9:33:19<21:19:32, 3.27it/s] 32%|███▏ | 120069/371472 [9:33:20<20:25:28, 3.42it/s] 32%|███▏ | 120070/371472 [9:33:20<23:47:23, 2.94it/s] 32%|███▏ | 120071/371472 [9:33:20<22:38:32, 3.08it/s] 32%|███▏ | 120072/371472 [9:33:21<21:35:02, 3.24it/s] 32%|███▏ | 120073/371472 [9:33:21<20:53:10, 3.34it/s] 32%|███▏ | 120074/371472 [9:33:21<21:36:39, 3.23it/s] 32%|███▏ | 120075/371472 [9:33:21<20:51:36, 3.35it/s] 32%|███▏ | 120076/371472 [9:33:22<19:44:57, 3.54it/s] 32%|███▏ | 120077/371472 [9:33:22<19:08:40, 3.65it/s] 32%|███▏ | 120078/371472 [9:33:22<19:10:31, 3.64it/s] 32%|███▏ | 120079/371472 [9:33:22<19:54:53, 3.51it/s] 32%|███▏ | 120080/371472 [9:33:23<20:58:08, 3.33it/s] {'loss': 3.351, 'learning_rate': 7.093990389795083e-07, 'epoch': 5.17} + 32%|███▏ | 120080/371472 [9:33:23<20:58:08, 3.33it/s] 32%|███▏ | 120081/371472 [9:33:23<20:47:41, 3.36it/s] 32%|███▏ | 120082/371472 [9:33:23<19:42:23, 3.54it/s] 32%|███▏ | 120083/371472 [9:33:24<19:39:17, 3.55it/s] 32%|███▏ | 120084/371472 [9:33:24<19:40:54, 3.55it/s] 32%|███▏ | 120085/371472 [9:33:24<19:38:01, 3.56it/s] 32%|███▏ | 120086/371472 [9:33:24<19:29:48, 3.58it/s] 32%|███▏ | 120087/371472 [9:33:25<19:03:30, 3.66it/s] 32%|███▏ | 120088/371472 [9:33:25<19:01:07, 3.67it/s] 32%|███▏ | 120089/371472 [9:33:25<20:11:49, 3.46it/s] 32%|███▏ | 120090/371472 [9:33:26<19:33:19, 3.57it/s] 32%|███▏ | 120091/371472 [9:33:26<19:23:40, 3.60it/s] 32%|███▏ | 120092/371472 [9:33:26<18:56:26, 3.69it/s] 32%|███▏ | 120093/371472 [9:33:26<18:53:17, 3.70it/s] 32%|███▏ | 120094/371472 [9:33:27<18:21:00, 3.81it/s] 32%|███▏ | 120095/371472 [9:33:27<20:41:33, 3.37it/s] 32%|███▏ | 120096/371472 [9:33:27<20:50:37, 3.35it/s] 32%|███▏ | 120097/371472 [9:33:28<19:46:10, 3.53it/s] 32%|███▏ | 120098/371472 [9:33:28<19:16:17, 3.62it/s] 32%|███▏ | 120099/371472 [9:33:28<18:38:23, 3.75it/s] 32%|███▏ | 120100/371472 [9:33:28<19:38:50, 3.55it/s] {'loss': 3.2758, 'learning_rate': 7.093505570040294e-07, 'epoch': 5.17} + 32%|███▏ | 120100/371472 [9:33:28<19:38:50, 3.55it/s] 32%|███▏ | 120101/371472 [9:33:29<19:43:11, 3.54it/s] 32%|███▏ | 120102/371472 [9:33:29<20:44:46, 3.37it/s] 32%|███▏ | 120103/371472 [9:33:29<19:55:18, 3.50it/s] 32%|███▏ | 120104/371472 [9:33:30<19:12:58, 3.63it/s] 32%|███▏ | 120105/371472 [9:33:30<18:36:43, 3.75it/s] 32%|███▏ | 120106/371472 [9:33:30<19:04:43, 3.66it/s] 32%|███▏ | 120107/371472 [9:33:30<19:09:08, 3.65it/s] 32%|███▏ | 120108/371472 [9:33:31<20:47:08, 3.36it/s] 32%|███▏ | 120109/371472 [9:33:31<19:53:33, 3.51it/s] 32%|███▏ | 120110/371472 [9:33:31<19:04:01, 3.66it/s] 32%|███▏ | 120111/371472 [9:33:32<20:22:04, 3.43it/s] 32%|███▏ | 120112/371472 [9:33:32<20:02:09, 3.48it/s] 32%|███▏ | 120113/371472 [9:33:32<20:14:30, 3.45it/s] 32%|███▏ | 120114/371472 [9:33:32<21:05:50, 3.31it/s] 32%|███▏ | 120115/371472 [9:33:33<19:51:55, 3.51it/s] 32%|███▏ | 120116/371472 [9:33:33<19:46:32, 3.53it/s] 32%|███▏ | 120117/371472 [9:33:33<20:30:42, 3.40it/s] 32%|███▏ | 120118/371472 [9:33:34<19:32:17, 3.57it/s] 32%|███▏ | 120119/371472 [9:33:34<19:44:10, 3.54it/s] 32%|███▏ | 120120/371472 [9:33:34<19:55:51, 3.50it/s] {'loss': 3.3792, 'learning_rate': 7.093020750285503e-07, 'epoch': 5.17} + 32%|███▏ | 120120/371472 [9:33:34<19:55:51, 3.50it/s] 32%|███▏ | 120121/371472 [9:33:34<19:07:24, 3.65it/s] 32%|███▏ | 120122/371472 [9:33:35<19:11:23, 3.64it/s] 32%|███▏ | 120123/371472 [9:33:35<18:44:42, 3.72it/s] 32%|███▏ | 120124/371472 [9:33:35<17:59:26, 3.88it/s] 32%|███▏ | 120125/371472 [9:33:35<18:17:52, 3.82it/s] 32%|███▏ | 120126/371472 [9:33:36<17:43:02, 3.94it/s] 32%|███▏ | 120127/371472 [9:33:36<18:12:10, 3.84it/s] 32%|███▏ | 120128/371472 [9:33:36<18:24:40, 3.79it/s] 32%|███▏ | 120129/371472 [9:33:36<19:13:26, 3.63it/s] 32%|███▏ | 120130/371472 [9:33:37<19:35:19, 3.56it/s] 32%|███▏ | 120131/371472 [9:33:37<21:45:13, 3.21it/s] 32%|███▏ | 120132/371472 [9:33:37<20:21:57, 3.43it/s] 32%|███▏ | 120133/371472 [9:33:38<19:48:51, 3.52it/s] 32%|███▏ | 120134/371472 [9:33:38<19:06:38, 3.65it/s] 32%|███▏ | 120135/371472 [9:33:38<22:13:18, 3.14it/s] 32%|███▏ | 120136/371472 [9:33:39<21:10:09, 3.30it/s] 32%|███▏ | 120137/371472 [9:33:39<20:42:29, 3.37it/s] 32%|███▏ | 120138/371472 [9:33:39<21:23:37, 3.26it/s] 32%|███▏ | 120139/371472 [9:33:39<19:59:48, 3.49it/s] 32%|███▏ | 120140/371472 [9:33:40<19:55:24, 3.50it/s] {'loss': 3.449, 'learning_rate': 7.092535930530715e-07, 'epoch': 5.17} + 32%|███▏ | 120140/371472 [9:33:40<19:55:24, 3.50it/s] 32%|███▏ | 120141/371472 [9:33:40<19:39:22, 3.55it/s] 32%|███▏ | 120142/371472 [9:33:40<19:27:32, 3.59it/s] 32%|███▏ | 120143/371472 [9:33:41<19:13:27, 3.63it/s] 32%|███▏ | 120144/371472 [9:33:41<18:44:11, 3.73it/s] 32%|███▏ | 120145/371472 [9:33:41<18:38:55, 3.74it/s] 32%|███▏ | 120146/371472 [9:33:41<18:51:58, 3.70it/s] 32%|███▏ | 120147/371472 [9:33:42<18:30:38, 3.77it/s] 32%|███▏ | 120148/371472 [9:33:42<19:50:32, 3.52it/s] 32%|███▏ | 120149/371472 [9:33:42<18:56:34, 3.69it/s] 32%|███▏ | 120150/371472 [9:33:42<20:46:40, 3.36it/s] 32%|███▏ | 120151/371472 [9:33:43<19:29:45, 3.58it/s] 32%|███▏ | 120152/371472 [9:33:43<19:29:15, 3.58it/s] 32%|███▏ | 120153/371472 [9:33:43<19:10:59, 3.64it/s] 32%|███▏ | 120154/371472 [9:33:44<18:46:21, 3.72it/s] 32%|███▏ | 120155/371472 [9:33:44<19:30:04, 3.58it/s] 32%|███▏ | 120156/371472 [9:33:44<19:35:07, 3.56it/s] 32%|███▏ | 120157/371472 [9:33:44<20:06:03, 3.47it/s] 32%|███▏ | 120158/371472 [9:33:45<19:04:00, 3.66it/s] 32%|███▏ | 120159/371472 [9:33:45<18:32:42, 3.76it/s] 32%|███▏ | 120160/371472 [9:33:45<19:49:19, 3.52it/s] {'loss': 3.3339, 'learning_rate': 7.092051110775927e-07, 'epoch': 5.18} + 32%|███▏ | 120160/371472 [9:33:45<19:49:19, 3.52it/s] 32%|███▏ | 120161/371472 [9:33:45<19:11:51, 3.64it/s] 32%|███▏ | 120162/371472 [9:33:46<20:20:01, 3.43it/s] 32%|███▏ | 120163/371472 [9:33:46<19:50:53, 3.52it/s] 32%|███▏ | 120164/371472 [9:33:46<19:31:47, 3.57it/s] 32%|███▏ | 120165/371472 [9:33:47<20:20:04, 3.43it/s] 32%|███▏ | 120166/371472 [9:33:47<19:25:52, 3.59it/s] 32%|███▏ | 120167/371472 [9:33:47<18:59:33, 3.68it/s] 32%|███▏ | 120168/371472 [9:33:47<18:53:13, 3.70it/s] 32%|███▏ | 120169/371472 [9:33:48<18:50:23, 3.71it/s] 32%|███▏ | 120170/371472 [9:33:48<19:06:57, 3.65it/s] 32%|███▏ | 120171/371472 [9:33:48<19:17:13, 3.62it/s] 32%|███▏ | 120172/371472 [9:33:49<19:45:30, 3.53it/s] 32%|███▏ | 120173/371472 [9:33:49<19:50:03, 3.52it/s] 32%|███▏ | 120174/371472 [9:33:49<19:25:10, 3.59it/s] 32%|███▏ | 120175/371472 [9:33:49<19:08:36, 3.65it/s] 32%|███▏ | 120176/371472 [9:33:50<18:58:57, 3.68it/s] 32%|███▏ | 120177/371472 [9:33:50<19:25:22, 3.59it/s] 32%|███▏ | 120178/371472 [9:33:50<19:29:01, 3.58it/s] 32%|███▏ | 120179/371472 [9:33:50<18:55:17, 3.69it/s] 32%|███▏ | 120180/371472 [9:33:51<19:30:37, 3.58it/s] {'loss': 3.4083, 'learning_rate': 7.091566291021138e-07, 'epoch': 5.18} + 32%|███▏ | 120180/371472 [9:33:51<19:30:37, 3.58it/s] 32%|███▏ | 120181/371472 [9:33:51<19:37:33, 3.56it/s] 32%|███▏ | 120182/371472 [9:33:51<20:09:52, 3.46it/s] 32%|███▏ | 120183/371472 [9:33:52<19:37:18, 3.56it/s] 32%|███▏ | 120184/371472 [9:33:52<20:04:58, 3.48it/s] 32%|███▏ | 120185/371472 [9:33:52<19:30:48, 3.58it/s] 32%|███▏ | 120186/371472 [9:33:52<18:35:00, 3.76it/s] 32%|███▏ | 120187/371472 [9:33:53<20:39:22, 3.38it/s] 32%|███▏ | 120188/371472 [9:33:53<20:19:33, 3.43it/s] 32%|███▏ | 120189/371472 [9:33:53<19:34:51, 3.56it/s] 32%|███▏ | 120190/371472 [9:33:54<19:27:27, 3.59it/s] 32%|███▏ | 120191/371472 [9:33:54<20:00:00, 3.49it/s] 32%|███▏ | 120192/371472 [9:33:54<19:00:12, 3.67it/s] 32%|███▏ | 120193/371472 [9:33:54<18:16:49, 3.82it/s] 32%|███▏ | 120194/371472 [9:33:55<18:53:38, 3.69it/s] 32%|███▏ | 120195/371472 [9:33:55<20:31:48, 3.40it/s] 32%|███▏ | 120196/371472 [9:33:55<21:59:47, 3.17it/s] 32%|███▏ | 120197/371472 [9:33:56<20:25:06, 3.42it/s] 32%|███▏ | 120198/371472 [9:33:56<20:35:57, 3.39it/s] 32%|███▏ | 120199/371472 [9:33:56<19:22:05, 3.60it/s] 32%|███▏ | 120200/371472 [9:33:56<18:22:35, 3.80it/s] {'loss': 3.2743, 'learning_rate': 7.091081471266348e-07, 'epoch': 5.18} + 32%|███▏ | 120200/371472 [9:33:56<18:22:35, 3.80it/s] 32%|███▏ | 120201/371472 [9:33:57<19:03:51, 3.66it/s] 32%|███▏ | 120202/371472 [9:33:57<18:43:53, 3.73it/s] 32%|███▏ | 120203/371472 [9:33:57<19:21:34, 3.61it/s] 32%|███▏ | 120204/371472 [9:33:58<19:06:37, 3.65it/s] 32%|███▏ | 120205/371472 [9:33:58<19:10:48, 3.64it/s] 32%|███▏ | 120206/371472 [9:33:58<19:23:35, 3.60it/s] 32%|███▏ | 120207/371472 [9:33:58<19:34:30, 3.57it/s] 32%|███▏ | 120208/371472 [9:33:59<19:32:20, 3.57it/s] 32%|███▏ | 120209/371472 [9:33:59<20:45:33, 3.36it/s] 32%|███▏ | 120210/371472 [9:33:59<19:45:44, 3.53it/s] 32%|███▏ | 120211/371472 [9:34:00<21:18:06, 3.28it/s] 32%|███▏ | 120212/371472 [9:34:00<19:53:31, 3.51it/s] 32%|███▏ | 120213/371472 [9:34:00<21:03:39, 3.31it/s] 32%|███▏ | 120214/371472 [9:34:00<20:35:18, 3.39it/s] 32%|███▏ | 120215/371472 [9:34:01<19:54:29, 3.51it/s] 32%|███▏ | 120216/371472 [9:34:01<20:27:33, 3.41it/s] 32%|███▏ | 120217/371472 [9:34:01<20:34:22, 3.39it/s] 32%|███▏ | 120218/371472 [9:34:02<20:18:27, 3.44it/s] 32%|███▏ | 120219/371472 [9:34:02<21:23:31, 3.26it/s] 32%|███▏ | 120220/371472 [9:34:02<19:59:40, 3.49it/s] {'loss': 3.2275, 'learning_rate': 7.09059665151156e-07, 'epoch': 5.18} + 32%|███▏ | 120220/371472 [9:34:02<19:59:40, 3.49it/s] 32%|███▏ | 120221/371472 [9:34:02<19:12:25, 3.63it/s] 32%|███▏ | 120222/371472 [9:34:03<18:41:40, 3.73it/s] 32%|███▏ | 120223/371472 [9:34:03<18:56:01, 3.69it/s] 32%|███▏ | 120224/371472 [9:34:03<19:03:16, 3.66it/s] 32%|███▏ | 120225/371472 [9:34:04<18:46:30, 3.72it/s] 32%|███▏ | 120226/371472 [9:34:04<18:27:46, 3.78it/s] 32%|███▏ | 120227/371472 [9:34:04<19:01:42, 3.67it/s] 32%|███▏ | 120228/371472 [9:34:04<20:26:54, 3.41it/s] 32%|███▏ | 120229/371472 [9:34:05<19:52:18, 3.51it/s] 32%|███▏ | 120230/371472 [9:34:05<19:34:21, 3.57it/s] 32%|███▏ | 120231/371472 [9:34:05<19:15:25, 3.62it/s] 32%|███▏ | 120232/371472 [9:34:05<19:08:45, 3.65it/s] 32%|███▏ | 120233/371472 [9:34:06<19:24:41, 3.60it/s] 32%|███▏ | 120234/371472 [9:34:06<19:59:53, 3.49it/s] 32%|███▏ | 120235/371472 [9:34:06<19:07:20, 3.65it/s] 32%|███▏ | 120236/371472 [9:34:07<19:23:43, 3.60it/s] 32%|███▏ | 120237/371472 [9:34:07<19:31:22, 3.57it/s] 32%|███▏ | 120238/371472 [9:34:07<20:30:04, 3.40it/s] 32%|███▏ | 120239/371472 [9:34:07<19:49:43, 3.52it/s] 32%|███▏ | 120240/371472 [9:34:08<19:26:17, 3.59it/s] {'loss': 3.2897, 'learning_rate': 7.090111831756771e-07, 'epoch': 5.18} + 32%|███▏ | 120240/371472 [9:34:08<19:26:17, 3.59it/s] 32%|███▏ | 120241/371472 [9:34:08<18:51:24, 3.70it/s] 32%|███▏ | 120242/371472 [9:34:08<19:10:38, 3.64it/s] 32%|███▏ | 120243/371472 [9:34:09<18:49:17, 3.71it/s] 32%|███▏ | 120244/371472 [9:34:09<21:17:24, 3.28it/s] 32%|███▏ | 120245/371472 [9:34:09<20:24:25, 3.42it/s] 32%|███▏ | 120246/371472 [9:34:09<19:49:21, 3.52it/s] 32%|███▏ | 120247/371472 [9:34:10<19:10:39, 3.64it/s] 32%|███▏ | 120248/371472 [9:34:10<20:32:29, 3.40it/s] 32%|███▏ | 120249/371472 [9:34:10<20:55:52, 3.33it/s] 32%|███▏ | 120250/371472 [9:34:11<19:41:54, 3.54it/s] 32%|███▏ | 120251/371472 [9:34:11<19:51:59, 3.51it/s] 32%|███▏ | 120252/371472 [9:34:11<19:01:19, 3.67it/s] 32%|███▏ | 120253/371472 [9:34:11<18:45:29, 3.72it/s] 32%|███▏ | 120254/371472 [9:34:12<19:08:37, 3.65it/s] 32%|███▏ | 120255/371472 [9:34:12<19:59:44, 3.49it/s] 32%|███▏ | 120256/371472 [9:34:12<20:11:03, 3.46it/s] 32%|███▏ | 120257/371472 [9:34:13<20:02:54, 3.48it/s] 32%|███▏ | 120258/371472 [9:34:13<19:15:17, 3.62it/s] 32%|███▏ | 120259/371472 [9:34:13<18:51:44, 3.70it/s] 32%|███▏ | 120260/371472 [9:34:13<18:22:24, 3.80it/s] {'loss': 3.3508, 'learning_rate': 7.089627012001982e-07, 'epoch': 5.18} + 32%|███▏ | 120260/371472 [9:34:13<18:22:24, 3.80it/s] 32%|███▏ | 120261/371472 [9:34:14<18:37:20, 3.75it/s] 32%|███▏ | 120262/371472 [9:34:14<20:11:59, 3.45it/s] 32%|███▏ | 120263/371472 [9:34:14<19:16:19, 3.62it/s] 32%|███▏ | 120264/371472 [9:34:14<18:43:14, 3.73it/s] 32%|███▏ | 120265/371472 [9:34:15<19:37:15, 3.56it/s] 32%|███▏ | 120266/371472 [9:34:15<19:25:33, 3.59it/s] 32%|███▏ | 120267/371472 [9:34:15<20:42:32, 3.37it/s] 32%|███▏ | 120268/371472 [9:34:16<20:38:37, 3.38it/s] 32%|███▏ | 120269/371472 [9:34:16<21:28:06, 3.25it/s] 32%|███▏ | 120270/371472 [9:34:16<20:39:18, 3.38it/s] 32%|███▏ | 120271/371472 [9:34:17<19:54:48, 3.50it/s] 32%|███▏ | 120272/371472 [9:34:17<20:40:47, 3.37it/s] 32%|███▏ | 120273/371472 [9:34:17<20:00:35, 3.49it/s] 32%|███▏ | 120274/371472 [9:34:17<21:31:09, 3.24it/s] 32%|███▏ | 120275/371472 [9:34:18<20:50:58, 3.35it/s] 32%|███▏ | 120276/371472 [9:34:18<21:03:53, 3.31it/s] 32%|███▏ | 120277/371472 [9:34:18<20:33:13, 3.39it/s] 32%|███▏ | 120278/371472 [9:34:19<22:41:12, 3.08it/s] 32%|███▏ | 120279/371472 [9:34:19<21:57:23, 3.18it/s] 32%|███▏ | 120280/371472 [9:34:19<20:34:27, 3.39it/s] {'loss': 3.2463, 'learning_rate': 7.089142192247192e-07, 'epoch': 5.18} + 32%|███▏ | 120280/371472 [9:34:19<20:34:27, 3.39it/s] 32%|███▏ | 120281/371472 [9:34:20<20:26:31, 3.41it/s] 32%|███▏ | 120282/371472 [9:34:20<19:57:07, 3.50it/s] 32%|███▏ | 120283/371472 [9:34:20<19:11:33, 3.64it/s] 32%|███▏ | 120284/371472 [9:34:20<20:49:28, 3.35it/s] 32%|███▏ | 120285/371472 [9:34:21<20:59:16, 3.32it/s] 32%|███▏ | 120286/371472 [9:34:21<19:43:16, 3.54it/s] 32%|███▏ | 120287/371472 [9:34:21<18:55:51, 3.69it/s] 32%|███▏ | 120288/371472 [9:34:22<24:56:36, 2.80it/s] 32%|███▏ | 120289/371472 [9:34:22<23:19:20, 2.99it/s] 32%|███▏ | 120290/371472 [9:34:22<21:58:55, 3.17it/s] 32%|███▏ | 120291/371472 [9:34:23<22:19:36, 3.13it/s] 32%|███▏ | 120292/371472 [9:34:23<22:30:50, 3.10it/s] 32%|███▏ | 120293/371472 [9:34:23<20:52:43, 3.34it/s] 32%|███▏ | 120294/371472 [9:34:23<19:58:14, 3.49it/s] 32%|███▏ | 120295/371472 [9:34:24<19:06:54, 3.65it/s] 32%|███▏ | 120296/371472 [9:34:24<18:35:41, 3.75it/s] 32%|███▏ | 120297/371472 [9:34:24<18:25:34, 3.79it/s] 32%|███▏ | 120298/371472 [9:34:25<19:03:22, 3.66it/s] 32%|███▏ | 120299/371472 [9:34:25<18:52:49, 3.70it/s] 32%|███▏ | 120300/371472 [9:34:25<19:30:04, 3.58it/s] {'loss': 3.1163, 'learning_rate': 7.088657372492404e-07, 'epoch': 5.18} + 32%|███▏ | 120300/371472 [9:34:25<19:30:04, 3.58it/s] 32%|███▏ | 120301/371472 [9:34:25<18:54:54, 3.69it/s] 32%|███▏ | 120302/371472 [9:34:26<18:32:37, 3.76it/s] 32%|███▏ | 120303/371472 [9:34:26<18:22:10, 3.80it/s] 32%|███▏ | 120304/371472 [9:34:26<18:09:32, 3.84it/s] 32%|███▏ | 120305/371472 [9:34:26<19:22:06, 3.60it/s] 32%|███▏ | 120306/371472 [9:34:27<19:06:05, 3.65it/s] 32%|███▏ | 120307/371472 [9:34:27<18:50:46, 3.70it/s] 32%|███▏ | 120308/371472 [9:34:27<19:20:40, 3.61it/s] 32%|███▏ | 120309/371472 [9:34:28<19:14:07, 3.63it/s] 32%|███▏ | 120310/371472 [9:34:28<19:40:16, 3.55it/s] 32%|███▏ | 120311/371472 [9:34:28<19:34:01, 3.57it/s] 32%|███▏ | 120312/371472 [9:34:29<23:04:38, 3.02it/s] 32%|███▏ | 120313/371472 [9:34:29<22:41:38, 3.07it/s] 32%|███▏ | 120314/371472 [9:34:29<21:16:50, 3.28it/s] 32%|███▏ | 120315/371472 [9:34:29<20:22:08, 3.43it/s] 32%|███▏ | 120316/371472 [9:34:30<20:11:05, 3.46it/s] 32%|███▏ | 120317/371472 [9:34:30<20:52:24, 3.34it/s] 32%|███▏ | 120318/371472 [9:34:30<20:09:33, 3.46it/s] 32%|███▏ | 120319/371472 [9:34:31<19:38:34, 3.55it/s] 32%|███▏ | 120320/371472 [9:34:31<21:31:22, 3.24it/s] {'loss': 3.2874, 'learning_rate': 7.088172552737616e-07, 'epoch': 5.18} + 32%|███▏ | 120320/371472 [9:34:31<21:31:22, 3.24it/s] 32%|███▏ | 120321/371472 [9:34:31<20:38:07, 3.38it/s] 32%|███▏ | 120322/371472 [9:34:31<21:26:05, 3.25it/s] 32%|███▏ | 120323/371472 [9:34:32<20:22:21, 3.42it/s] 32%|███▏ | 120324/371472 [9:34:32<19:59:32, 3.49it/s] 32%|███▏ | 120325/371472 [9:34:32<19:40:14, 3.55it/s] 32%|███▏ | 120326/371472 [9:34:33<19:03:44, 3.66it/s] 32%|███▏ | 120327/371472 [9:34:33<18:54:15, 3.69it/s] 32%|███▏ | 120328/371472 [9:34:33<18:36:04, 3.75it/s] 32%|███▏ | 120329/371472 [9:34:33<18:48:59, 3.71it/s] 32%|███▏ | 120330/371472 [9:34:34<18:33:45, 3.76it/s] 32%|███▏ | 120331/371472 [9:34:34<18:36:54, 3.75it/s] 32%|███▏ | 120332/371472 [9:34:34<19:32:25, 3.57it/s] 32%|███▏ | 120333/371472 [9:34:34<19:30:23, 3.58it/s] 32%|███▏ | 120334/371472 [9:34:35<19:24:13, 3.60it/s] 32%|███▏ | 120335/371472 [9:34:35<19:29:57, 3.58it/s] 32%|███▏ | 120336/371472 [9:34:35<19:07:54, 3.65it/s] 32%|███▏ | 120337/371472 [9:34:36<18:43:31, 3.73it/s] 32%|███▏ | 120338/371472 [9:34:36<18:40:56, 3.73it/s] 32%|███▏ | 120339/371472 [9:34:36<18:19:22, 3.81it/s] 32%|███▏ | 120340/371472 [9:34:36<17:53:50, 3.90it/s] {'loss': 3.2875, 'learning_rate': 7.087687732982827e-07, 'epoch': 5.18} + 32%|███▏ | 120340/371472 [9:34:36<17:53:50, 3.90it/s] 32%|███▏ | 120341/371472 [9:34:37<18:55:14, 3.69it/s] 32%|███▏ | 120342/371472 [9:34:37<19:31:10, 3.57it/s] 32%|███▏ | 120343/371472 [9:34:37<19:40:26, 3.55it/s] 32%|███▏ | 120344/371472 [9:34:37<20:34:21, 3.39it/s] 32%|███▏ | 120345/371472 [9:34:38<22:22:09, 3.12it/s] 32%|███▏ | 120346/371472 [9:34:38<21:00:54, 3.32it/s] 32%|███▏ | 120347/371472 [9:34:38<21:50:06, 3.19it/s] 32%|███▏ | 120348/371472 [9:34:39<21:25:07, 3.26it/s] 32%|███▏ | 120349/371472 [9:34:39<20:09:48, 3.46it/s] 32%|███▏ | 120350/371472 [9:34:39<19:41:10, 3.54it/s] 32%|███▏ | 120351/371472 [9:34:40<20:56:00, 3.33it/s] 32%|███▏ | 120352/371472 [9:34:40<20:33:24, 3.39it/s] 32%|███▏ | 120353/371472 [9:34:40<19:36:12, 3.56it/s] 32%|███▏ | 120354/371472 [9:34:40<19:19:59, 3.61it/s] 32%|███▏ | 120355/371472 [9:34:41<19:45:53, 3.53it/s] 32%|███▏ | 120356/371472 [9:34:41<21:50:32, 3.19it/s] 32%|███▏ | 120357/371472 [9:34:41<21:15:16, 3.28it/s] 32%|███▏ | 120358/371472 [9:34:42<20:17:02, 3.44it/s] 32%|███▏ | 120359/371472 [9:34:42<20:19:55, 3.43it/s] 32%|███▏ | 120360/371472 [9:34:42<20:01:02, 3.48it/s] {'loss': 3.3629, 'learning_rate': 7.087202913228037e-07, 'epoch': 5.18} + 32%|███▏ | 120360/371472 [9:34:42<20:01:02, 3.48it/s] 32%|███▏ | 120361/371472 [9:34:43<20:20:10, 3.43it/s] 32%|███▏ | 120362/371472 [9:34:43<20:33:18, 3.39it/s] 32%|███▏ | 120363/371472 [9:34:43<19:42:13, 3.54it/s] 32%|███▏ | 120364/371472 [9:34:43<20:34:16, 3.39it/s] 32%|███▏ | 120365/371472 [9:34:44<20:05:14, 3.47it/s] 32%|███▏ | 120366/371472 [9:34:44<19:11:33, 3.63it/s] 32%|███▏ | 120367/371472 [9:34:44<20:17:21, 3.44it/s] 32%|███▏ | 120368/371472 [9:34:45<19:48:57, 3.52it/s] 32%|███▏ | 120369/371472 [9:34:45<19:30:14, 3.58it/s] 32%|███▏ | 120370/371472 [9:34:45<18:51:05, 3.70it/s] 32%|███▏ | 120371/371472 [9:34:45<18:49:57, 3.70it/s] 32%|███▏ | 120372/371472 [9:34:46<18:28:03, 3.78it/s] 32%|███▏ | 120373/371472 [9:34:46<18:20:46, 3.80it/s] 32%|███▏ | 120374/371472 [9:34:46<18:58:11, 3.68it/s] 32%|███▏ | 120375/371472 [9:34:46<19:50:50, 3.51it/s] 32%|███▏ | 120376/371472 [9:34:47<19:27:37, 3.58it/s] 32%|███▏ | 120377/371472 [9:34:47<19:18:50, 3.61it/s] 32%|███▏ | 120378/371472 [9:34:47<20:15:28, 3.44it/s] 32%|███▏ | 120379/371472 [9:34:48<19:49:49, 3.52it/s] 32%|███▏ | 120380/371472 [9:34:48<19:39:04, 3.55it/s] {'loss': 3.1691, 'learning_rate': 7.086718093473248e-07, 'epoch': 5.18} + 32%|███▏ | 120380/371472 [9:34:48<19:39:04, 3.55it/s] 32%|███▏ | 120381/371472 [9:34:48<20:49:07, 3.35it/s] 32%|███▏ | 120382/371472 [9:34:49<22:23:34, 3.11it/s] 32%|███▏ | 120383/371472 [9:34:49<21:39:02, 3.22it/s] 32%|███▏ | 120384/371472 [9:34:49<21:44:10, 3.21it/s] 32%|███▏ | 120385/371472 [9:34:49<21:25:26, 3.26it/s] 32%|███▏ | 120386/371472 [9:34:50<20:32:43, 3.39it/s] 32%|███▏ | 120387/371472 [9:34:50<19:50:43, 3.51it/s] 32%|███▏ | 120388/371472 [9:34:50<19:24:18, 3.59it/s] 32%|███▏ | 120389/371472 [9:34:50<19:03:07, 3.66it/s] 32%|███▏ | 120390/371472 [9:34:51<19:26:52, 3.59it/s] 32%|███▏ | 120391/371472 [9:34:51<18:44:15, 3.72it/s] 32%|███▏ | 120392/371472 [9:34:51<19:29:34, 3.58it/s] 32%|███▏ | 120393/371472 [9:34:52<19:39:23, 3.55it/s] 32%|███▏ | 120394/371472 [9:34:52<19:27:46, 3.58it/s] 32%|███▏ | 120395/371472 [9:34:52<18:40:31, 3.73it/s] 32%|███▏ | 120396/371472 [9:34:52<18:13:41, 3.83it/s] 32%|███▏ | 120397/371472 [9:34:53<18:34:41, 3.75it/s] 32%|███▏ | 120398/371472 [9:34:53<18:05:51, 3.85it/s] 32%|███▏ | 120399/371472 [9:34:53<19:02:33, 3.66it/s] 32%|███▏ | 120400/371472 [9:34:53<18:26:24, 3.78it/s] {'loss': 3.2311, 'learning_rate': 7.08623327371846e-07, 'epoch': 5.19} + 32%|███▏ | 120400/371472 [9:34:53<18:26:24, 3.78it/s] 32%|███▏ | 120401/371472 [9:34:54<19:03:03, 3.66it/s] 32%|███▏ | 120402/371472 [9:34:54<19:28:12, 3.58it/s] 32%|███▏ | 120403/371472 [9:34:54<19:30:55, 3.57it/s] 32%|███▏ | 120404/371472 [9:34:55<20:08:38, 3.46it/s] 32%|███▏ | 120405/371472 [9:34:55<20:06:29, 3.47it/s] 32%|███▏ | 120406/371472 [9:34:55<19:57:28, 3.49it/s] 32%|███▏ | 120407/371472 [9:34:55<19:47:27, 3.52it/s] 32%|███▏ | 120408/371472 [9:34:56<19:41:10, 3.54it/s] 32%|███▏ | 120409/371472 [9:34:56<20:12:04, 3.45it/s] 32%|███▏ | 120410/371472 [9:34:56<20:04:28, 3.47it/s] 32%|██��▏ | 120411/371472 [9:34:57<19:10:21, 3.64it/s] 32%|███▏ | 120412/371472 [9:34:57<19:01:26, 3.67it/s] 32%|███▏ | 120413/371472 [9:34:57<18:43:26, 3.72it/s] 32%|███▏ | 120414/371472 [9:34:57<18:43:22, 3.72it/s] 32%|███▏ | 120415/371472 [9:34:58<18:21:12, 3.80it/s] 32%|███▏ | 120416/371472 [9:34:58<18:48:41, 3.71it/s] 32%|███▏ | 120417/371472 [9:34:58<19:38:10, 3.55it/s] 32%|███▏ | 120418/371472 [9:34:59<20:08:17, 3.46it/s] 32%|███▏ | 120419/371472 [9:34:59<19:15:42, 3.62it/s] 32%|███▏ | 120420/371472 [9:34:59<18:58:17, 3.68it/s] {'loss': 3.1645, 'learning_rate': 7.085748453963671e-07, 'epoch': 5.19} + 32%|███▏ | 120420/371472 [9:34:59<18:58:17, 3.68it/s] 32%|███▏ | 120421/371472 [9:34:59<18:49:28, 3.70it/s] 32%|███▏ | 120422/371472 [9:35:00<18:50:16, 3.70it/s] 32%|███▏ | 120423/371472 [9:35:00<18:47:54, 3.71it/s] 32%|███▏ | 120424/371472 [9:35:00<18:33:45, 3.76it/s] 32%|███▏ | 120425/371472 [9:35:00<18:28:18, 3.78it/s] 32%|███▏ | 120426/371472 [9:35:01<19:04:06, 3.66it/s] 32%|███▏ | 120427/371472 [9:35:01<18:54:18, 3.69it/s] 32%|███▏ | 120428/371472 [9:35:01<19:32:15, 3.57it/s] 32%|███▏ | 120429/371472 [9:35:01<19:10:16, 3.64it/s] 32%|███▏ | 120430/371472 [9:35:02<18:41:57, 3.73it/s] 32%|███▏ | 120431/371472 [9:35:02<18:30:59, 3.77it/s] 32%|███▏ | 120432/371472 [9:35:02<18:19:29, 3.81it/s] 32%|███▏ | 120433/371472 [9:35:02<18:01:36, 3.87it/s] 32%|███▏ | 120434/371472 [9:35:03<18:01:10, 3.87it/s] 32%|███▏ | 120435/371472 [9:35:03<18:38:32, 3.74it/s] 32%|███▏ | 120436/371472 [9:35:03<18:25:46, 3.78it/s] 32%|███▏ | 120437/371472 [9:35:04<19:14:20, 3.62it/s] 32%|███▏ | 120438/371472 [9:35:04<19:19:21, 3.61it/s] 32%|███▏ | 120439/371472 [9:35:04<19:07:23, 3.65it/s] 32%|███▏ | 120440/371472 [9:35:04<18:28:39, 3.77it/s] {'loss': 3.1069, 'learning_rate': 7.085263634208881e-07, 'epoch': 5.19} + 32%|███▏ | 120440/371472 [9:35:04<18:28:39, 3.77it/s] 32%|███▏ | 120441/371472 [9:35:05<18:06:59, 3.85it/s] 32%|███▏ | 120442/371472 [9:35:05<18:45:27, 3.72it/s] 32%|███▏ | 120443/371472 [9:35:05<22:56:32, 3.04it/s] 32%|███▏ | 120444/371472 [9:35:06<22:18:29, 3.13it/s] 32%|███▏ | 120445/371472 [9:35:06<21:52:51, 3.19it/s] 32%|███▏ | 120446/371472 [9:35:06<20:30:04, 3.40it/s] 32%|███▏ | 120447/371472 [9:35:07<21:21:48, 3.26it/s] 32%|███▏ | 120448/371472 [9:35:07<20:27:35, 3.41it/s] 32%|███▏ | 120449/371472 [9:35:07<19:53:20, 3.51it/s] 32%|███▏ | 120450/371472 [9:35:07<19:39:19, 3.55it/s] 32%|███▏ | 120451/371472 [9:35:08<19:56:32, 3.50it/s] 32%|███▏ | 120452/371472 [9:35:08<19:07:22, 3.65it/s] 32%|███▏ | 120453/371472 [9:35:08<18:59:17, 3.67it/s] 32%|███▏ | 120454/371472 [9:35:08<18:59:52, 3.67it/s] 32%|███▏ | 120455/371472 [9:35:09<18:28:48, 3.77it/s] 32%|███▏ | 120456/371472 [9:35:09<18:16:51, 3.81it/s] 32%|███▏ | 120457/371472 [9:35:09<18:10:01, 3.84it/s] 32%|███▏ | 120458/371472 [9:35:10<18:48:43, 3.71it/s] 32%|███▏ | 120459/371472 [9:35:10<18:29:47, 3.77it/s] 32%|███▏ | 120460/371472 [9:35:10<18:04:24, 3.86it/s] {'loss': 3.3347, 'learning_rate': 7.084778814454093e-07, 'epoch': 5.19} + 32%|███▏ | 120460/371472 [9:35:10<18:04:24, 3.86it/s] 32%|███▏ | 120461/371472 [9:35:10<19:06:57, 3.65it/s] 32%|███▏ | 120462/371472 [9:35:11<19:31:05, 3.57it/s] 32%|███▏ | 120463/371472 [9:35:11<19:43:05, 3.54it/s] 32%|███▏ | 120464/371472 [9:35:11<19:20:05, 3.61it/s] 32%|███▏ | 120465/371472 [9:35:11<18:15:43, 3.82it/s] 32%|███▏ | 120466/371472 [9:35:12<18:22:36, 3.79it/s] 32%|███▏ | 120467/371472 [9:35:12<18:14:39, 3.82it/s] 32%|███▏ | 120468/371472 [9:35:12<18:03:48, 3.86it/s] 32%|███▏ | 120469/371472 [9:35:12<18:32:49, 3.76it/s] 32%|███▏ | 120470/371472 [9:35:13<18:11:50, 3.83it/s] 32%|███▏ | 120471/371472 [9:35:13<19:36:03, 3.56it/s] 32%|███▏ | 120472/371472 [9:35:13<19:24:45, 3.59it/s] 32%|███▏ | 120473/371472 [9:35:14<18:41:23, 3.73it/s] 32%|███▏ | 120474/371472 [9:35:14<18:43:25, 3.72it/s] 32%|███▏ | 120475/371472 [9:35:14<18:30:36, 3.77it/s] 32%|███▏ | 120476/371472 [9:35:14<18:29:18, 3.77it/s] 32%|███▏ | 120477/371472 [9:35:15<20:21:52, 3.42it/s] 32%|███▏ | 120478/371472 [9:35:15<21:04:32, 3.31it/s] 32%|███▏ | 120479/371472 [9:35:15<20:24:17, 3.42it/s] 32%|███▏ | 120480/371472 [9:35:16<19:46:46, 3.52it/s] {'loss': 3.2396, 'learning_rate': 7.084293994699305e-07, 'epoch': 5.19} + 32%|███▏ | 120480/371472 [9:35:16<19:46:46, 3.52it/s] 32%|███▏ | 120481/371472 [9:35:16<19:26:41, 3.59it/s] 32%|███▏ | 120482/371472 [9:35:16<18:27:11, 3.78it/s] 32%|███▏ | 120483/371472 [9:35:16<18:06:49, 3.85it/s] 32%|███▏ | 120484/371472 [9:35:17<18:22:44, 3.79it/s] 32%|███▏ | 120485/371472 [9:35:17<19:05:46, 3.65it/s] 32%|███▏ | 120486/371472 [9:35:17<18:26:32, 3.78it/s] 32%|███▏ | 120487/371472 [9:35:17<19:49:07, 3.52it/s] 32%|███▏ | 120488/371472 [9:35:18<19:58:45, 3.49it/s] 32%|███▏ | 120489/371472 [9:35:18<20:16:26, 3.44it/s] 32%|███▏ | 120490/371472 [9:35:18<19:33:44, 3.56it/s] 32%|███▏ | 120491/371472 [9:35:19<19:28:41, 3.58it/s] 32%|███▏ | 120492/371472 [9:35:19<19:11:38, 3.63it/s] 32%|███▏ | 120493/371472 [9:35:19<18:39:15, 3.74it/s] 32%|███▏ | 120494/371472 [9:35:19<20:17:25, 3.44it/s] 32%|███▏ | 120495/371472 [9:35:20<20:06:36, 3.47it/s] 32%|███▏ | 120496/371472 [9:35:20<20:01:06, 3.48it/s] 32%|███▏ | 120497/371472 [9:35:20<21:09:55, 3.29it/s] 32%|███▏ | 120498/371472 [9:35:21<21:03:18, 3.31it/s] 32%|███▏ | 120499/371472 [9:35:21<20:38:50, 3.38it/s] 32%|███▏ | 120500/371472 [9:35:21<20:18:02, 3.43it/s] {'loss': 3.3069, 'learning_rate': 7.083809174944513e-07, 'epoch': 5.19} + 32%|███▏ | 120500/371472 [9:35:21<20:18:02, 3.43it/s] 32%|███▏ | 120501/371472 [9:35:22<22:16:02, 3.13it/s] 32%|███▏ | 120502/371472 [9:35:22<21:41:56, 3.21it/s] 32%|███▏ | 120503/371472 [9:35:22<20:45:30, 3.36it/s] 32%|███▏ | 120504/371472 [9:35:22<20:31:19, 3.40it/s] 32%|███▏ | 120505/371472 [9:35:23<21:23:17, 3.26it/s] 32%|███▏ | 120506/371472 [9:35:23<19:57:20, 3.49it/s] 32%|███▏ | 120507/371472 [9:35:23<19:47:01, 3.52it/s] 32%|███▏ | 120508/371472 [9:35:24<19:03:00, 3.66it/s] 32%|███▏ | 120509/371472 [9:35:24<18:18:55, 3.81it/s] 32%|███▏ | 120510/371472 [9:35:24<18:33:17, 3.76it/s] 32%|███▏ | 120511/371472 [9:35:24<19:23:25, 3.60it/s] 32%|███▏ | 120512/371472 [9:35:25<20:27:27, 3.41it/s] 32%|███▏ | 120513/371472 [9:35:25<19:42:17, 3.54it/s] 32%|███▏ | 120514/371472 [9:35:25<19:37:08, 3.55it/s] 32%|███▏ | 120515/371472 [9:35:26<19:30:33, 3.57it/s] 32%|███▏ | 120516/371472 [9:35:26<19:19:15, 3.61it/s] 32%|███▏ | 120517/371472 [9:35:26<18:55:26, 3.68it/s] 32%|███▏ | 120518/371472 [9:35:26<19:48:05, 3.52it/s] 32%|███▏ | 120519/371472 [9:35:27<19:30:21, 3.57it/s] 32%|███▏ | 120520/371472 [9:35:27<19:50:34, 3.51it/s] {'loss': 3.3206, 'learning_rate': 7.083324355189725e-07, 'epoch': 5.19} + 32%|███▏ | 120520/371472 [9:35:27<19:50:34, 3.51it/s] 32%|███▏ | 120521/371472 [9:35:27<19:54:11, 3.50it/s] 32%|███▏ | 120522/371472 [9:35:27<19:58:19, 3.49it/s] 32%|███▏ | 120523/371472 [9:35:28<21:18:59, 3.27it/s] 32%|███▏ | 120524/371472 [9:35:28<20:08:51, 3.46it/s] 32%|███▏ | 120525/371472 [9:35:28<19:45:53, 3.53it/s] 32%|███▏ | 120526/371472 [9:35:29<19:03:08, 3.66it/s] 32%|███▏ | 120527/371472 [9:35:29<18:59:18, 3.67it/s] 32%|███▏ | 120528/371472 [9:35:29<19:41:23, 3.54it/s] 32%|███▏ | 120529/371472 [9:35:29<18:54:51, 3.69it/s] 32%|███▏ | 120530/371472 [9:35:30<19:29:45, 3.58it/s] 32%|███▏ | 120531/371472 [9:35:30<19:20:00, 3.61it/s] 32%|███▏ | 120532/371472 [9:35:30<19:31:04, 3.57it/s] 32%|███▏ | 120533/371472 [9:35:31<20:38:47, 3.38it/s] 32%|███▏ | 120534/371472 [9:35:31<19:29:32, 3.58it/s] 32%|███▏ | 120535/371472 [9:35:31<19:32:40, 3.57it/s] 32%|███▏ | 120536/371472 [9:35:31<19:43:58, 3.53it/s] 32%|███▏ | 120537/371472 [9:35:32<20:04:01, 3.47it/s] 32%|███▏ | 120538/371472 [9:35:32<19:23:08, 3.60it/s] 32%|███▏ | 120539/371472 [9:35:32<19:27:18, 3.58it/s] 32%|███▏ | 120540/371472 [9:35:33<19:51:24, 3.51it/s] {'loss': 3.3677, 'learning_rate': 7.082839535434937e-07, 'epoch': 5.19} + 32%|███▏ | 120540/371472 [9:35:33<19:51:24, 3.51it/s] 32%|███▏ | 120541/371472 [9:35:33<19:51:19, 3.51it/s] 32%|███▏ | 120542/371472 [9:35:33<20:45:51, 3.36it/s] 32%|███▏ | 120543/371472 [9:35:34<21:34:49, 3.23it/s] 32%|███▏ | 120544/371472 [9:35:34<20:45:22, 3.36it/s] 32%|███▏ | 120545/371472 [9:35:34<19:38:34, 3.55it/s] 32%|███▏ | 120546/371472 [9:35:34<18:51:36, 3.70it/s] 32%|███▏ | 120547/371472 [9:35:35<19:07:25, 3.64it/s] 32%|███▏ | 120548/371472 [9:35:35<20:39:27, 3.37it/s] 32%|███▏ | 120549/371472 [9:35:35<20:17:18, 3.44it/s] 32%|███▏ | 120550/371472 [9:35:35<20:16:32, 3.44it/s] 32%|███▏ | 120551/371472 [9:35:36<19:53:44, 3.50it/s] 32%|███▏ | 120552/371472 [9:35:36<19:06:20, 3.65it/s] 32%|███▏ | 120553/371472 [9:35:36<18:57:38, 3.68it/s] 32%|███▏ | 120554/371472 [9:35:37<18:31:10, 3.76it/s] 32%|███▏ | 120555/371472 [9:35:37<18:24:52, 3.78it/s] 32%|███▏ | 120556/371472 [9:35:37<20:24:16, 3.42it/s] 32%|███▏ | 120557/371472 [9:35:37<20:07:16, 3.46it/s] 32%|███▏ | 120558/371472 [9:35:38<19:52:37, 3.51it/s] 32%|███▏ | 120559/371472 [9:35:38<19:08:56, 3.64it/s] 32%|███▏ | 120560/371472 [9:35:38<19:44:01, 3.53it/s] {'loss': 3.3377, 'learning_rate': 7.082354715680149e-07, 'epoch': 5.19} + 32%|███▏ | 120560/371472 [9:35:38<19:44:01, 3.53it/s] 32%|███▏ | 120561/371472 [9:35:39<19:32:33, 3.57it/s] 32%|███▏ | 120562/371472 [9:35:39<19:00:00, 3.67it/s] 32%|███▏ | 120563/371472 [9:35:39<19:07:03, 3.65it/s] 32%|███▏ | 120564/371472 [9:35:39<18:47:06, 3.71it/s] 32%|███▏ | 120565/371472 [9:35:40<18:27:37, 3.78it/s] 32%|███▏ | 120566/371472 [9:35:40<18:57:41, 3.68it/s] 32%|███▏ | 120567/371472 [9:35:40<19:09:50, 3.64it/s] 32%|███▏ | 120568/371472 [9:35:40<19:14:59, 3.62it/s] 32%|███▏ | 120569/371472 [9:35:41<19:19:09, 3.61it/s] 32%|███▏ | 120570/371472 [9:35:41<18:39:04, 3.74it/s] 32%|███▏ | 120571/371472 [9:35:41<18:48:27, 3.71it/s] 32%|███▏ | 120572/371472 [9:35:41<18:48:32, 3.71it/s] 32%|███▏ | 120573/371472 [9:35:42<19:28:07, 3.58it/s] 32%|███▏ | 120574/371472 [9:35:42<19:56:29, 3.49it/s] 32%|███▏ | 120575/371472 [9:35:42<20:36:40, 3.38it/s] 32%|███▏ | 120576/371472 [9:35:43<21:38:47, 3.22it/s] 32%|███▏ | 120577/371472 [9:35:43<22:55:50, 3.04it/s] 32%|███▏ | 120578/371472 [9:35:43<22:29:58, 3.10it/s] 32%|███▏ | 120579/371472 [9:35:44<21:27:58, 3.25it/s] 32%|███▏ | 120580/371472 [9:35:44<20:17:53, 3.43it/s] {'loss': 3.3011, 'learning_rate': 7.081869895925359e-07, 'epoch': 5.19} + 32%|███▏ | 120580/371472 [9:35:44<20:17:53, 3.43it/s] 32%|███▏ | 120581/371472 [9:35:44<21:29:38, 3.24it/s] 32%|███▏ | 120582/371472 [9:35:45<20:28:06, 3.40it/s] 32%|███▏ | 120583/371472 [9:35:45<20:19:59, 3.43it/s] 32%|███▏ | 120584/371472 [9:35:45<19:56:06, 3.50it/s] 32%|███▏ | 120585/371472 [9:35:45<20:03:03, 3.48it/s] 32%|███▏ | 120586/371472 [9:35:46<19:58:15, 3.49it/s] 32%|███▏ | 120587/371472 [9:35:46<19:20:43, 3.60it/s] 32%|███▏ | 120588/371472 [9:35:46<18:45:31, 3.72it/s] 32%|███▏ | 120589/371472 [9:35:46<19:15:56, 3.62it/s] 32%|███▏ | 120590/371472 [9:35:47<19:15:53, 3.62it/s] 32%|███▏ | 120591/371472 [9:35:47<18:44:30, 3.72it/s] 32%|███▏ | 120592/371472 [9:35:47<19:50:12, 3.51it/s] 32%|███▏ | 120593/371472 [9:35:48<20:26:38, 3.41it/s] 32%|███▏ | 120594/371472 [9:35:48<20:28:17, 3.40it/s] 32%|███▏ | 120595/371472 [9:35:48<19:19:47, 3.61it/s] 32%|███▏ | 120596/371472 [9:35:48<19:28:57, 3.58it/s] 32%|███▏ | 120597/371472 [9:35:49<20:04:32, 3.47it/s] 32%|███▏ | 120598/371472 [9:35:49<18:57:53, 3.67it/s] 32%|███▏ | 120599/371472 [9:35:49<19:43:11, 3.53it/s] 32%|███▏ | 120600/371472 [9:35:50<18:50:02, 3.70it/s] {'loss': 3.207, 'learning_rate': 7.081385076170571e-07, 'epoch': 5.19} + 32%|███▏ | 120600/371472 [9:35:50<18:50:02, 3.70it/s] 32%|███▏ | 120601/371472 [9:35:50<19:01:08, 3.66it/s] 32%|███▏ | 120602/371472 [9:35:50<18:41:54, 3.73it/s] 32%|███▏ | 120603/371472 [9:35:50<18:28:37, 3.77it/s] 32%|███▏ | 120604/371472 [9:35:51<19:11:26, 3.63it/s] 32%|███▏ | 120605/371472 [9:35:51<19:28:53, 3.58it/s] 32%|███▏ | 120606/371472 [9:35:51<19:27:51, 3.58it/s] 32%|███▏ | 120607/371472 [9:35:51<18:40:04, 3.73it/s] 32%|███▏ | 120608/371472 [9:35:52<19:07:51, 3.64it/s] 32%|███▏ | 120609/371472 [9:35:52<19:47:17, 3.52it/s] 32%|███▏ | 120610/371472 [9:35:52<19:35:57, 3.56it/s] 32%|███▏ | 120611/371472 [9:35:53<19:15:46, 3.62it/s] 32%|███▏ | 120612/371472 [9:35:53<21:42:33, 3.21it/s] 32%|███▏ | 120613/371472 [9:35:53<20:28:46, 3.40it/s] 32%|███▏ | 120614/371472 [9:35:54<20:51:01, 3.34it/s] 32%|███▏ | 120615/371472 [9:35:54<20:44:04, 3.36it/s] 32%|███▏ | 120616/371472 [9:35:54<22:15:52, 3.13it/s] 32%|███▏ | 120617/371472 [9:35:55<22:41:04, 3.07it/s] 32%|███▏ | 120618/371472 [9:35:55<20:44:05, 3.36it/s] 32%|███▏ | 120619/371472 [9:35:55<20:24:43, 3.41it/s] 32%|███▏ | 120620/371472 [9:35:55<19:17:55, 3.61it/s] {'loss': 3.1702, 'learning_rate': 7.080900256415781e-07, 'epoch': 5.2} + 32%|███▏ | 120620/371472 [9:35:55<19:17:55, 3.61it/s] 32%|███▏ | 120621/371472 [9:35:56<20:33:42, 3.39it/s] 32%|███▏ | 120622/371472 [9:35:56<20:36:25, 3.38it/s] 32%|███▏ | 120623/371472 [9:35:56<20:22:49, 3.42it/s] 32%|███▏ | 120624/371472 [9:35:56<19:26:51, 3.58it/s] 32%|███▏ | 120625/371472 [9:35:57<18:54:50, 3.68it/s] 32%|███▏ | 120626/371472 [9:35:57<18:42:11, 3.73it/s] 32%|███▏ | 120627/371472 [9:35:57<18:23:06, 3.79it/s] 32%|███▏ | 120628/371472 [9:35:58<18:55:34, 3.68it/s] 32%|███▏ | 120629/371472 [9:35:58<19:16:02, 3.62it/s] 32%|███▏ | 120630/371472 [9:35:58<20:37:29, 3.38it/s] 32%|███▏ | 120631/371472 [9:35:59<21:14:37, 3.28it/s] 32%|███▏ | 120632/371472 [9:35:59<20:49:56, 3.34it/s] 32%|███▏ | 120633/371472 [9:35:59<20:52:19, 3.34it/s] 32%|███▏ | 120634/371472 [9:35:59<20:39:12, 3.37it/s] 32%|███▏ | 120635/371472 [9:36:00<20:20:09, 3.43it/s] 32%|███▏ | 120636/371472 [9:36:00<19:49:19, 3.52it/s] 32%|███▏ | 120637/371472 [9:36:00<19:05:45, 3.65it/s] 32%|███▏ | 120638/371472 [9:36:00<19:13:47, 3.62it/s] 32%|███▏ | 120639/371472 [9:36:01<19:07:42, 3.64it/s] 32%|███▏ | 120640/371472 [9:36:01<18:30:05, 3.77it/s] {'loss': 3.2638, 'learning_rate': 7.080415436660992e-07, 'epoch': 5.2} + 32%|███▏ | 120640/371472 [9:36:01<18:30:05, 3.77it/s] 32%|███▏ | 120641/371472 [9:36:01<18:57:07, 3.68it/s] 32%|███▏ | 120642/371472 [9:36:02<19:50:23, 3.51it/s] 32%|███▏ | 120643/371472 [9:36:02<19:32:49, 3.56it/s] 32%|███▏ | 120644/371472 [9:36:02<19:43:42, 3.53it/s] 32%|███▏ | 120645/371472 [9:36:02<19:04:28, 3.65it/s] 32%|███▏ | 120646/371472 [9:36:03<18:38:58, 3.74it/s] 32%|███▏ | 120647/371472 [9:36:03<21:03:00, 3.31it/s] 32%|███▏ | 120648/371472 [9:36:03<20:03:33, 3.47it/s] 32%|███▏ | 120649/371472 [9:36:04<19:42:43, 3.53it/s] 32%|███▏ | 120650/371472 [9:36:04<19:16:21, 3.62it/s] 32%|███▏ | 120651/371472 [9:36:04<18:46:32, 3.71it/s] 32%|███▏ | 120652/371472 [9:36:04<18:19:47, 3.80it/s] 32%|███▏ | 120653/371472 [9:36:05<19:14:37, 3.62it/s] 32%|███▏ | 120654/371472 [9:36:05<18:29:24, 3.77it/s] 32%|███▏ | 120655/371472 [9:36:05<18:13:27, 3.82it/s] 32%|███▏ | 120656/371472 [9:36:05<18:39:59, 3.73it/s] 32%|███▏ | 120657/371472 [9:36:06<21:13:16, 3.28it/s] 32%|███▏ | 120658/371472 [9:36:06<21:04:55, 3.30it/s] 32%|███▏ | 120659/371472 [9:36:06<19:51:18, 3.51it/s] 32%|███▏ | 120660/371472 [9:36:07<20:52:07, 3.34it/s] {'loss': 3.2216, 'learning_rate': 7.079930616906202e-07, 'epoch': 5.2} + 32%|███▏ | 120660/371472 [9:36:07<20:52:07, 3.34it/s] 32%|███▏ | 120661/371472 [9:36:07<22:20:34, 3.12it/s] 32%|███▏ | 120662/371472 [9:36:07<22:01:47, 3.16it/s] 32%|███▏ | 120663/371472 [9:36:08<21:47:21, 3.20it/s] 32%|███▏ | 120664/371472 [9:36:08<20:31:54, 3.39it/s] 32%|███▏ | 120665/371472 [9:36:08<20:07:57, 3.46it/s] 32%|███▏ | 120666/371472 [9:36:08<20:30:16, 3.40it/s] 32%|███▏ | 120667/371472 [9:36:09<20:41:50, 3.37it/s] 32%|███▏ | 120668/371472 [9:36:09<19:30:38, 3.57it/s] 32%|███▏ | 120669/371472 [9:36:09<19:08:31, 3.64it/s] 32%|███▏ | 120670/371472 [9:36:10<19:00:40, 3.66it/s] 32%|███▏ | 120671/371472 [9:36:10<20:07:41, 3.46it/s] 32%|███▏ | 120672/371472 [9:36:10<20:00:39, 3.48it/s] 32%|███▏ | 120673/371472 [9:36:10<19:11:11, 3.63it/s] 32%|███▏ | 120674/371472 [9:36:11<18:40:46, 3.73it/s] 32%|███▏ | 120675/371472 [9:36:11<19:02:37, 3.66it/s] 32%|███▏ | 120676/371472 [9:36:11<19:17:08, 3.61it/s] 32%|███▏ | 120677/371472 [9:36:12<19:20:28, 3.60it/s] 32%|███▏ | 120678/371472 [9:36:12<19:34:31, 3.56it/s] 32%|███▏ | 120679/371472 [9:36:12<20:30:47, 3.40it/s] 32%|███▏ | 120680/371472 [9:36:12<19:32:55, 3.56it/s] {'loss': 3.1858, 'learning_rate': 7.079445797151414e-07, 'epoch': 5.2} + 32%|███▏ | 120680/371472 [9:36:12<19:32:55, 3.56it/s] 32%|███▏ | 120681/371472 [9:36:13<21:35:14, 3.23it/s] 32%|███▏ | 120682/371472 [9:36:13<22:23:40, 3.11it/s] 32%|███▏ | 120683/371472 [9:36:13<20:33:31, 3.39it/s] 32%|███▏ | 120684/371472 [9:36:14<19:35:21, 3.56it/s] 32%|███▏ | 120685/371472 [9:36:14<19:11:41, 3.63it/s] 32%|███▏ | 120686/371472 [9:36:14<21:13:11, 3.28it/s] 32%|███▏ | 120687/371472 [9:36:14<20:29:04, 3.40it/s] 32%|███▏ | 120688/371472 [9:36:15<19:29:14, 3.57it/s] 32%|███▏ | 120689/371472 [9:36:15<18:54:39, 3.68it/s] 32%|███▏ | 120690/371472 [9:36:15<18:16:59, 3.81it/s] 32%|███▏ | 120691/371472 [9:36:15<18:10:17, 3.83it/s] 32%|███▏ | 120692/371472 [9:36:16<18:23:09, 3.79it/s] 32%|███▏ | 120693/371472 [9:36:16<19:12:20, 3.63it/s] 32%|███▏ | 120694/371472 [9:36:16<18:13:36, 3.82it/s] 32%|███▏ | 120695/371472 [9:36:17<18:49:56, 3.70it/s] 32%|███▏ | 120696/371472 [9:36:17<19:25:37, 3.59it/s] 32%|███▏ | 120697/371472 [9:36:17<19:48:11, 3.52it/s] 32%|███▏ | 120698/371472 [9:36:17<20:00:18, 3.48it/s] 32%|███▏ | 120699/371472 [9:36:18<19:41:38, 3.54it/s] 32%|███▏ | 120700/371472 [9:36:18<18:54:25, 3.68it/s] {'loss': 3.3262, 'learning_rate': 7.078960977396626e-07, 'epoch': 5.2} + 32%|███▏ | 120700/371472 [9:36:18<18:54:25, 3.68it/s] 32%|███▏ | 120701/371472 [9:36:18<19:06:33, 3.65it/s] 32%|███▏ | 120702/371472 [9:36:19<19:57:12, 3.49it/s] 32%|███▏ | 120703/371472 [9:36:19<19:17:57, 3.61it/s] 32%|███▏ | 120704/371472 [9:36:19<18:55:03, 3.68it/s] 32%|███▏ | 120705/371472 [9:36:19<18:45:15, 3.71it/s] 32%|███▏ | 120706/371472 [9:36:20<18:18:58, 3.80it/s] 32%|███▏ | 120707/371472 [9:36:20<19:07:45, 3.64it/s] 32%|███▏ | 120708/371472 [9:36:20<18:42:28, 3.72it/s] 32%|███▏ | 120709/371472 [9:36:20<19:20:31, 3.60it/s] 32%|███▏ | 120710/371472 [9:36:21<20:00:39, 3.48it/s] 32%|███▏ | 120711/371472 [9:36:21<19:33:26, 3.56it/s] 32%|███▏ | 120712/371472 [9:36:21<19:34:08, 3.56it/s] 32%|███▏ | 120713/371472 [9:36:22<18:34:43, 3.75it/s] 32%|███▏ | 120714/371472 [9:36:22<19:55:22, 3.50it/s] 32%|███▏ | 120715/371472 [9:36:22<19:58:53, 3.49it/s] 32%|███▏ | 120716/371472 [9:36:22<19:37:20, 3.55it/s] 32%|███▏ | 120717/371472 [9:36:23<19:10:31, 3.63it/s] 32%|███▏ | 120718/371472 [9:36:23<19:50:28, 3.51it/s] 32%|███▏ | 120719/371472 [9:36:23<19:17:09, 3.61it/s] 32%|███▏ | 120720/371472 [9:36:24<19:05:51, 3.65it/s] {'loss': 3.1264, 'learning_rate': 7.078476157641837e-07, 'epoch': 5.2} + 32%|███▏ | 120720/371472 [9:36:24<19:05:51, 3.65it/s] 32%|███▏ | 120721/371472 [9:36:24<20:05:49, 3.47it/s] 32%|███▏ | 120722/371472 [9:36:24<19:52:50, 3.50it/s] 32%|███▏ | 120723/371472 [9:36:24<19:40:40, 3.54it/s] 32%|███▏ | 120724/371472 [9:36:25<19:39:36, 3.54it/s] 32%|███▏ | 120725/371472 [9:36:25<20:20:08, 3.43it/s] 32%|███▏ | 120726/371472 [9:36:25<21:23:33, 3.26it/s] 32%|███▏ | 120727/371472 [9:36:26<21:30:10, 3.24it/s] 32%|███▏ | 120728/371472 [9:36:26<20:13:54, 3.44it/s] 33%|███▎ | 120729/371472 [9:36:26<20:44:59, 3.36it/s] 33%|███▎ | 120730/371472 [9:36:27<20:32:12, 3.39it/s] 33%|███▎ | 120731/371472 [9:36:27<20:05:56, 3.47it/s] 33%|███▎ | 120732/371472 [9:36:27<19:09:06, 3.64it/s] 33%|███▎ | 120733/371472 [9:36:27<19:35:00, 3.56it/s] 33%|███▎ | 120734/371472 [9:36:28<19:09:12, 3.64it/s] 33%|███▎ | 120735/371472 [9:36:28<19:22:45, 3.59it/s] 33%|███▎ | 120736/371472 [9:36:28<18:56:06, 3.68it/s] 33%|███▎ | 120737/371472 [9:36:29<21:12:20, 3.28it/s] 33%|███▎ | 120738/371472 [9:36:29<22:20:42, 3.12it/s] 33%|███▎ | 120739/371472 [9:36:29<21:38:20, 3.22it/s] 33%|███▎ | 120740/371472 [9:36:29<20:41:45, 3.37it/s] {'loss': 3.15, 'learning_rate': 7.077991337887047e-07, 'epoch': 5.2} + 33%|███▎ | 120740/371472 [9:36:29<20:41:45, 3.37it/s] 33%|███▎ | 120741/371472 [9:36:30<23:05:39, 3.02it/s] 33%|███▎ | 120742/371472 [9:36:30<21:19:58, 3.26it/s] 33%|███▎ | 120743/371472 [9:36:30<20:18:06, 3.43it/s] 33%|███▎ | 120744/371472 [9:36:31<21:00:03, 3.32it/s] 33%|███▎ | 120745/371472 [9:36:31<21:07:37, 3.30it/s] 33%|███▎ | 120746/371472 [9:36:31<20:41:06, 3.37it/s] 33%|███▎ | 120747/371472 [9:36:32<20:21:06, 3.42it/s] 33%|███▎ | 120748/371472 [9:36:32<20:30:40, 3.40it/s] 33%|███▎ | 120749/371472 [9:36:32<19:29:48, 3.57it/s] 33%|███▎ | 120750/371472 [9:36:32<18:31:07, 3.76it/s] 33%|███▎ | 120751/371472 [9:36:33<18:16:29, 3.81it/s] 33%|███▎ | 120752/371472 [9:36:33<18:08:22, 3.84it/s] 33%|███▎ | 120753/371472 [9:36:33<18:52:31, 3.69it/s] 33%|███▎ | 120754/371472 [9:36:33<18:36:18, 3.74it/s] 33%|███▎ | 120755/371472 [9:36:34<18:10:12, 3.83it/s] 33%|███▎ | 120756/371472 [9:36:34<18:03:22, 3.86it/s] 33%|███▎ | 120757/371472 [9:36:34<19:25:41, 3.58it/s] 33%|███▎ | 120758/371472 [9:36:34<18:50:27, 3.70it/s] 33%|███▎ | 120759/371472 [9:36:35<18:21:23, 3.79it/s] 33%|███▎ | 120760/371472 [9:36:35<17:53:06, 3.89it/s] {'loss': 3.2252, 'learning_rate': 7.077506518132258e-07, 'epoch': 5.2} + 33%|███▎ | 120760/371472 [9:36:35<17:53:06, 3.89it/s] 33%|███▎ | 120761/371472 [9:36:35<17:36:31, 3.95it/s] 33%|███▎ | 120762/371472 [9:36:35<18:08:50, 3.84it/s] 33%|███▎ | 120763/371472 [9:36:36<18:18:18, 3.80it/s] 33%|███▎ | 120764/371472 [9:36:36<18:37:54, 3.74it/s] 33%|███▎ | 120765/371472 [9:36:36<18:30:45, 3.76it/s] 33%|███▎ | 120766/371472 [9:36:37<18:46:48, 3.71it/s] 33%|███▎ | 120767/371472 [9:36:37<17:59:41, 3.87it/s] 33%|███▎ | 120768/371472 [9:36:37<17:51:42, 3.90it/s] 33%|███▎ | 120769/371472 [9:36:37<18:44:57, 3.71it/s] 33%|███▎ | 120770/371472 [9:36:38<19:35:58, 3.55it/s] 33%|███▎ | 120771/371472 [9:36:38<18:50:13, 3.70it/s] 33%|███▎ | 120772/371472 [9:36:38<19:06:17, 3.65it/s] 33%|███▎ | 120773/371472 [9:36:39<20:22:39, 3.42it/s] 33%|███▎ | 120774/371472 [9:36:39<20:04:35, 3.47it/s] 33%|███▎ | 120775/371472 [9:36:39<19:09:01, 3.64it/s] 33%|███▎ | 120776/371472 [9:36:39<18:27:48, 3.77it/s] 33%|███▎ | 120777/371472 [9:36:40<19:01:45, 3.66it/s] 33%|███▎ | 120778/371472 [9:36:40<19:00:57, 3.66it/s] 33%|███▎ | 120779/371472 [9:36:40<18:44:03, 3.72it/s] 33%|███▎ | 120780/371472 [9:36:40<19:02:05, 3.66it/s] {'loss': 3.3022, 'learning_rate': 7.07702169837747e-07, 'epoch': 5.2} + 33%|███▎ | 120780/371472 [9:36:40<19:02:05, 3.66it/s] 33%|███▎ | 120781/371472 [9:36:41<19:22:56, 3.59it/s] 33%|███▎ | 120782/371472 [9:36:41<19:00:07, 3.66it/s] 33%|███▎ | 120783/371472 [9:36:41<18:24:27, 3.78it/s] 33%|███▎ | 120784/371472 [9:36:41<18:40:25, 3.73it/s] 33%|███▎ | 120785/371472 [9:36:42<18:44:06, 3.72it/s] 33%|███▎ | 120786/371472 [9:36:42<18:43:54, 3.72it/s] 33%|███▎ | 120787/371472 [9:36:42<18:40:02, 3.73it/s] 33%|███▎ | 120788/371472 [9:36:43<19:37:47, 3.55it/s] 33%|███▎ | 120789/371472 [9:36:43<20:13:10, 3.44it/s] 33%|███▎ | 120790/371472 [9:36:43<19:42:42, 3.53it/s] 33%|███▎ | 120791/371472 [9:36:43<19:26:05, 3.58it/s] 33%|███▎ | 120792/371472 [9:36:44<21:01:56, 3.31it/s] 33%|███▎ | 120793/371472 [9:36:44<19:56:41, 3.49it/s] 33%|███▎ | 120794/371472 [9:36:44<20:25:34, 3.41it/s] 33%|███▎ | 120795/371472 [9:36:45<19:16:41, 3.61it/s] 33%|███▎ | 120796/371472 [9:36:45<19:23:05, 3.59it/s] 33%|███▎ | 120797/371472 [9:36:45<18:55:44, 3.68it/s] 33%|███▎ | 120798/371472 [9:36:45<19:00:30, 3.66it/s] 33%|███▎ | 120799/371472 [9:36:46<18:46:41, 3.71it/s] 33%|███▎ | 120800/371472 [9:36:46<20:05:14, 3.47it/s] {'loss': 3.1993, 'learning_rate': 7.07653687862268e-07, 'epoch': 5.2} + 33%|███▎ | 120800/371472 [9:36:46<20:05:14, 3.47it/s] 33%|███▎ | 120801/371472 [9:36:46<19:17:02, 3.61it/s] 33%|███▎ | 120802/371472 [9:36:47<19:17:25, 3.61it/s] 33%|███▎ | 120803/371472 [9:36:47<18:57:05, 3.67it/s] 33%|███▎ | 120804/371472 [9:36:47<18:50:31, 3.70it/s] 33%|███▎ | 120805/371472 [9:36:47<19:02:24, 3.66it/s] 33%|███▎ | 120806/371472 [9:36:48<19:07:43, 3.64it/s] 33%|███▎ | 120807/371472 [9:36:48<19:31:25, 3.57it/s] 33%|███▎ | 120808/371472 [9:36:48<19:55:35, 3.49it/s] 33%|███▎ | 120809/371472 [9:36:48<19:31:02, 3.57it/s] 33%|███▎ | 120810/371472 [9:36:49<19:27:26, 3.58it/s] 33%|███▎ | 120811/371472 [9:36:49<20:01:34, 3.48it/s] 33%|███▎ | 120812/371472 [9:36:49<20:43:31, 3.36it/s] 33%|███▎ | 120813/371472 [9:36:50<19:59:55, 3.48it/s] 33%|███▎ | 120814/371472 [9:36:50<19:40:38, 3.54it/s] 33%|███▎ | 120815/371472 [9:36:50<19:35:53, 3.55it/s] 33%|███▎ | 120816/371472 [9:36:51<21:09:43, 3.29it/s] 33%|███▎ | 120817/371472 [9:36:51<20:02:51, 3.47it/s] 33%|███▎ | 120818/371472 [9:36:51<19:08:07, 3.64it/s] 33%|███▎ | 120819/371472 [9:36:51<19:17:13, 3.61it/s] 33%|███▎ | 120820/371472 [9:36:52<19:11:17, 3.63it/s] {'loss': 3.17, 'learning_rate': 7.076052058867891e-07, 'epoch': 5.2} + 33%|███▎ | 120820/371472 [9:36:52<19:11:17, 3.63it/s] 33%|███▎ | 120821/371472 [9:36:52<18:27:18, 3.77it/s] 33%|███▎ | 120822/371472 [9:36:52<18:15:02, 3.81it/s] 33%|███▎ | 120823/371472 [9:36:52<18:43:37, 3.72it/s] 33%|███▎ | 120824/371472 [9:36:53<19:23:16, 3.59it/s] 33%|███▎ | 120825/371472 [9:36:53<19:14:08, 3.62it/s] 33%|███▎ | 120826/371472 [9:36:53<18:33:29, 3.75it/s] 33%|███▎ | 120827/371472 [9:36:53<18:59:06, 3.67it/s] 33%|███▎ | 120828/371472 [9:36:54<18:21:28, 3.79it/s] 33%|███▎ | 120829/371472 [9:36:54<18:47:44, 3.70it/s] 33%|███▎ | 120830/371472 [9:36:54<19:03:09, 3.65it/s] 33%|███▎ | 120831/371472 [9:36:55<19:28:51, 3.57it/s] 33%|███▎ | 120832/371472 [9:36:55<19:01:30, 3.66it/s] 33%|███▎ | 120833/371472 [9:36:55<18:29:28, 3.77it/s] 33%|███▎ | 120834/371472 [9:36:55<19:03:41, 3.65it/s] 33%|███▎ | 120835/371472 [9:36:56<19:44:34, 3.53it/s] 33%|███▎ | 120836/371472 [9:36:56<19:41:29, 3.54it/s] 33%|███▎ | 120837/371472 [9:36:56<19:14:04, 3.62it/s] 33%|███▎ | 120838/371472 [9:36:56<19:27:02, 3.58it/s] 33%|███▎ | 120839/371472 [9:36:57<19:26:23, 3.58it/s] 33%|███▎ | 120840/371472 [9:36:57<20:25:37, 3.41it/s] {'loss': 3.1966, 'learning_rate': 7.075567239113103e-07, 'epoch': 5.2} + 33%|███▎ | 120840/371472 [9:36:57<20:25:37, 3.41it/s] 33%|███▎ | 120841/371472 [9:36:57<20:44:02, 3.36it/s] 33%|███▎ | 120842/371472 [9:36:58<20:27:35, 3.40it/s] 33%|███▎ | 120843/371472 [9:36:58<19:56:17, 3.49it/s] 33%|███▎ | 120844/371472 [9:36:58<21:05:18, 3.30it/s] 33%|███▎ | 120845/371472 [9:36:59<20:20:47, 3.42it/s] 33%|███▎ | 120846/371472 [9:36:59<19:31:05, 3.57it/s] 33%|███▎ | 120847/371472 [9:36:59<19:56:33, 3.49it/s] 33%|███▎ | 120848/371472 [9:36:59<20:32:24, 3.39it/s] 33%|███▎ | 120849/371472 [9:37:00<19:27:08, 3.58it/s] 33%|███▎ | 120850/371472 [9:37:00<18:44:45, 3.71it/s] 33%|███▎ | 120851/371472 [9:37:00<18:19:34, 3.80it/s] 33%|███▎ | 120852/371472 [9:37:00<19:19:20, 3.60it/s] 33%|███▎ | 120853/371472 [9:37:01<19:03:44, 3.65it/s] 33%|███▎ | 120854/371472 [9:37:01<20:09:13, 3.45it/s] 33%|███▎ | 120855/371472 [9:37:01<19:59:02, 3.48it/s] 33%|███▎ | 120856/371472 [9:37:02<18:49:52, 3.70it/s] 33%|███▎ | 120857/371472 [9:37:02<18:45:53, 3.71it/s] 33%|███▎ | 120858/371472 [9:37:02<19:07:42, 3.64it/s] 33%|███▎ | 120859/371472 [9:37:02<18:49:53, 3.70it/s] 33%|███▎ | 120860/371472 [9:37:03<18:36:03, 3.74it/s] {'loss': 3.2244, 'learning_rate': 7.075082419358315e-07, 'epoch': 5.21} + 33%|███▎ | 120860/371472 [9:37:03<18:36:03, 3.74it/s] 33%|███▎ | 120861/371472 [9:37:03<18:07:27, 3.84it/s] 33%|███▎ | 120862/371472 [9:37:03<18:55:17, 3.68it/s] 33%|███▎ | 120863/371472 [9:37:03<19:05:44, 3.65it/s] 33%|███▎ | 120864/371472 [9:37:04<19:19:40, 3.60it/s] 33%|███▎ | 120865/371472 [9:37:04<18:27:19, 3.77it/s] 33%|███▎ | 120866/371472 [9:37:04<19:40:44, 3.54it/s] 33%|███▎ | 120867/371472 [9:37:05<18:38:02, 3.74it/s] 33%|███▎ | 120868/371472 [9:37:05<18:19:54, 3.80it/s] 33%|███▎ | 120869/371472 [9:37:05<18:08:35, 3.84it/s] 33%|███▎ | 120870/371472 [9:37:05<17:41:35, 3.93it/s] 33%|███▎ | 120871/371472 [9:37:06<20:06:52, 3.46it/s] 33%|███▎ | 120872/371472 [9:37:06<20:08:34, 3.46it/s] 33%|███▎ | 120873/371472 [9:37:06<19:27:02, 3.58it/s] 33%|███▎ | 120874/371472 [9:37:07<23:20:53, 2.98it/s] 33%|███▎ | 120875/371472 [9:37:07<21:20:12, 3.26it/s] 33%|███▎ | 120876/371472 [9:37:07<20:36:47, 3.38it/s] 33%|███▎ | 120877/371472 [9:37:07<19:43:30, 3.53it/s] 33%|███▎ | 120878/371472 [9:37:08<19:56:05, 3.49it/s] 33%|███▎ | 120879/371472 [9:37:08<18:51:38, 3.69it/s] 33%|███▎ | 120880/371472 [9:37:08<18:57:47, 3.67it/s] {'loss': 3.2044, 'learning_rate': 7.074597599603524e-07, 'epoch': 5.21} + 33%|███▎ | 120880/371472 [9:37:08<18:57:47, 3.67it/s] 33%|███▎ | 120881/371472 [9:37:09<18:53:55, 3.68it/s] 33%|███▎ | 120882/371472 [9:37:09<19:07:56, 3.64it/s] 33%|███▎ | 120883/371472 [9:37:09<18:34:41, 3.75it/s] 33%|███▎ | 120884/371472 [9:37:09<19:59:40, 3.48it/s] 33%|███▎ | 120885/371472 [9:37:10<19:28:40, 3.57it/s] 33%|███▎ | 120886/371472 [9:37:10<18:54:56, 3.68it/s] 33%|███▎ | 120887/371472 [9:37:10<18:34:16, 3.75it/s] 33%|███▎ | 120888/371472 [9:37:10<18:36:41, 3.74it/s] 33%|███▎ | 120889/371472 [9:37:11<19:46:57, 3.52it/s] 33%|███▎ | 120890/371472 [9:37:11<18:47:09, 3.71it/s] 33%|███▎ | 120891/371472 [9:37:11<18:30:22, 3.76it/s] 33%|███▎ | 120892/371472 [9:37:12<18:59:14, 3.67it/s] 33%|███▎ | 120893/371472 [9:37:12<18:10:47, 3.83it/s] 33%|███▎ | 120894/371472 [9:37:12<18:08:38, 3.84it/s] 33%|███▎ | 120895/371472 [9:37:12<18:59:09, 3.67it/s] 33%|███▎ | 120896/371472 [9:37:13<18:46:28, 3.71it/s] 33%|███▎ | 120897/371472 [9:37:13<20:03:36, 3.47it/s] 33%|███▎ | 120898/371472 [9:37:13<19:25:45, 3.58it/s] 33%|███▎ | 120899/371472 [9:37:14<21:05:07, 3.30it/s] 33%|███▎ | 120900/371472 [9:37:14<19:40:38, 3.54it/s] {'loss': 3.4288, 'learning_rate': 7.074112779848735e-07, 'epoch': 5.21} + 33%|███▎ | 120900/371472 [9:37:14<19:40:38, 3.54it/s] 33%|███▎ | 120901/371472 [9:37:14<19:12:55, 3.62it/s] 33%|███▎ | 120902/371472 [9:37:14<19:58:35, 3.48it/s] 33%|███▎ | 120903/371472 [9:37:15<19:40:20, 3.54it/s] 33%|███▎ | 120904/371472 [9:37:15<19:29:45, 3.57it/s] 33%|███▎ | 120905/371472 [9:37:15<19:15:01, 3.62it/s] 33%|███▎ | 120906/371472 [9:37:15<19:13:53, 3.62it/s] 33%|███▎ | 120907/371472 [9:37:16<19:29:11, 3.57it/s] 33%|███▎ | 120908/371472 [9:37:16<18:40:25, 3.73it/s] 33%|███▎ | 120909/371472 [9:37:16<19:07:06, 3.64it/s] 33%|███▎ | 120910/371472 [9:37:17<19:56:55, 3.49it/s] 33%|███▎ | 120911/371472 [9:37:17<18:46:57, 3.71it/s] 33%|███▎ | 120912/371472 [9:37:17<19:43:14, 3.53it/s] 33%|███▎ | 120913/371472 [9:37:17<19:43:03, 3.53it/s] 33%|███▎ | 120914/371472 [9:37:18<19:22:23, 3.59it/s] 33%|███▎ | 120915/371472 [9:37:18<19:06:05, 3.64it/s] 33%|███▎ | 120916/371472 [9:37:18<19:06:16, 3.64it/s] 33%|███▎ | 120917/371472 [9:37:19<19:56:58, 3.49it/s] 33%|███▎ | 120918/371472 [9:37:19<23:37:16, 2.95it/s] 33%|███▎ | 120919/371472 [9:37:19<22:06:19, 3.15it/s] 33%|███▎ | 120920/371472 [9:37:20<21:38:27, 3.22it/s] {'loss': 3.2562, 'learning_rate': 7.073627960093947e-07, 'epoch': 5.21} + 33%|███▎ | 120920/371472 [9:37:20<21:38:27, 3.22it/s] 33%|███▎ | 120921/371472 [9:37:20<20:27:12, 3.40it/s] 33%|███▎ | 120922/371472 [9:37:20<19:27:48, 3.58it/s] 33%|███▎ | 120923/371472 [9:37:20<20:09:35, 3.45it/s] 33%|███▎ | 120924/371472 [9:37:21<19:22:28, 3.59it/s] 33%|███▎ | 120925/371472 [9:37:21<19:25:01, 3.58it/s] 33%|███▎ | 120926/371472 [9:37:21<19:33:11, 3.56it/s] 33%|███▎ | 120927/371472 [9:37:21<19:07:52, 3.64it/s] 33%|███��� | 120928/371472 [9:37:22<18:44:54, 3.71it/s] 33%|███▎ | 120929/371472 [9:37:22<18:41:04, 3.72it/s] 33%|███▎ | 120930/371472 [9:37:22<18:34:08, 3.75it/s] 33%|███▎ | 120931/371472 [9:37:23<18:43:37, 3.72it/s] 33%|███▎ | 120932/371472 [9:37:23<19:08:02, 3.64it/s] 33%|███▎ | 120933/371472 [9:37:23<18:32:21, 3.75it/s] 33%|███▎ | 120934/371472 [9:37:23<19:55:34, 3.49it/s] 33%|███▎ | 120935/371472 [9:37:24<19:38:10, 3.54it/s] 33%|███▎ | 120936/371472 [9:37:24<20:07:12, 3.46it/s] 33%|███▎ | 120937/371472 [9:37:24<19:29:46, 3.57it/s] 33%|███▎ | 120938/371472 [9:37:24<18:48:42, 3.70it/s] 33%|███▎ | 120939/371472 [9:37:25<18:30:29, 3.76it/s] 33%|███▎ | 120940/371472 [9:37:25<18:47:57, 3.70it/s] {'loss': 3.3652, 'learning_rate': 7.073143140339159e-07, 'epoch': 5.21} + 33%|███▎ | 120940/371472 [9:37:25<18:47:57, 3.70it/s] 33%|███▎ | 120941/371472 [9:37:25<19:22:10, 3.59it/s] 33%|███▎ | 120942/371472 [9:37:26<18:50:50, 3.69it/s] 33%|███▎ | 120943/371472 [9:37:26<18:45:52, 3.71it/s] 33%|███▎ | 120944/371472 [9:37:26<19:36:25, 3.55it/s] 33%|███▎ | 120945/371472 [9:37:26<19:42:04, 3.53it/s] 33%|███▎ | 120946/371472 [9:37:27<19:03:33, 3.65it/s] 33%|███▎ | 120947/371472 [9:37:27<19:19:25, 3.60it/s] 33%|███▎ | 120948/371472 [9:37:27<19:54:39, 3.50it/s] 33%|███▎ | 120949/371472 [9:37:28<20:17:40, 3.43it/s] 33%|███▎ | 120950/371472 [9:37:28<21:09:16, 3.29it/s] 33%|███▎ | 120951/371472 [9:37:28<20:29:32, 3.40it/s] 33%|███▎ | 120952/371472 [9:37:28<21:06:07, 3.30it/s] 33%|███▎ | 120953/371472 [9:37:29<20:28:00, 3.40it/s] 33%|███▎ | 120954/371472 [9:37:29<19:32:54, 3.56it/s] 33%|███▎ | 120955/371472 [9:37:29<19:47:24, 3.52it/s] 33%|███▎ | 120956/371472 [9:37:30<19:26:46, 3.58it/s] 33%|███▎ | 120957/371472 [9:37:30<19:23:23, 3.59it/s] 33%|███▎ | 120958/371472 [9:37:30<19:05:59, 3.64it/s] 33%|███▎ | 120959/371472 [9:37:30<18:29:40, 3.76it/s] 33%|███▎ | 120960/371472 [9:37:31<18:13:00, 3.82it/s] {'loss': 3.2845, 'learning_rate': 7.072658320584369e-07, 'epoch': 5.21} + 33%|███▎ | 120960/371472 [9:37:31<18:13:00, 3.82it/s] 33%|███▎ | 120961/371472 [9:37:31<17:55:01, 3.88it/s] 33%|███▎ | 120962/371472 [9:37:31<18:29:40, 3.76it/s] 33%|███▎ | 120963/371472 [9:37:31<18:37:35, 3.74it/s] 33%|███▎ | 120964/371472 [9:37:32<18:20:30, 3.79it/s] 33%|███▎ | 120965/371472 [9:37:32<20:38:28, 3.37it/s] 33%|███▎ | 120966/371472 [9:37:32<19:36:03, 3.55it/s] 33%|███▎ | 120967/371472 [9:37:33<19:15:21, 3.61it/s] 33%|███▎ | 120968/371472 [9:37:33<19:13:10, 3.62it/s] 33%|███▎ | 120969/371472 [9:37:33<19:19:50, 3.60it/s] 33%|███▎ | 120970/371472 [9:37:33<19:22:17, 3.59it/s] 33%|███▎ | 120971/371472 [9:37:34<20:50:07, 3.34it/s] 33%|███▎ | 120972/371472 [9:37:34<20:00:56, 3.48it/s] 33%|███▎ | 120973/371472 [9:37:34<18:55:51, 3.68it/s] 33%|███▎ | 120974/371472 [9:37:35<19:48:09, 3.51it/s] 33%|███▎ | 120975/371472 [9:37:35<19:18:36, 3.60it/s] 33%|███▎ | 120976/371472 [9:37:35<19:01:57, 3.66it/s] 33%|███▎ | 120977/371472 [9:37:35<19:09:01, 3.63it/s] 33%|███▎ | 120978/371472 [9:37:36<18:48:08, 3.70it/s] 33%|███▎ | 120979/371472 [9:37:36<18:45:17, 3.71it/s] 33%|███▎ | 120980/371472 [9:37:36<18:32:54, 3.75it/s] {'loss': 3.1904, 'learning_rate': 7.07217350082958e-07, 'epoch': 5.21} + 33%|███▎ | 120980/371472 [9:37:36<18:32:54, 3.75it/s] 33%|███▎ | 120981/371472 [9:37:36<19:14:22, 3.62it/s] 33%|███▎ | 120982/371472 [9:37:37<19:13:58, 3.62it/s] 33%|███▎ | 120983/371472 [9:37:37<19:34:19, 3.56it/s] 33%|███▎ | 120984/371472 [9:37:37<18:32:45, 3.75it/s] 33%|███▎ | 120985/371472 [9:37:37<17:52:08, 3.89it/s] 33%|███▎ | 120986/371472 [9:37:38<17:23:53, 4.00it/s] 33%|███▎ | 120987/371472 [9:37:38<17:17:18, 4.02it/s] 33%|███▎ | 120988/371472 [9:37:38<17:29:11, 3.98it/s] 33%|███▎ | 120989/371472 [9:37:38<17:37:01, 3.95it/s] 33%|███▎ | 120990/371472 [9:37:39<17:33:10, 3.96it/s] 33%|███▎ | 120991/371472 [9:37:39<18:11:37, 3.82it/s] 33%|███▎ | 120992/371472 [9:37:39<17:48:05, 3.91it/s] 33%|███▎ | 120993/371472 [9:37:40<17:52:06, 3.89it/s] 33%|███▎ | 120994/371472 [9:37:40<18:25:33, 3.78it/s] 33%|███▎ | 120995/371472 [9:37:40<19:22:19, 3.59it/s] 33%|███▎ | 120996/371472 [9:37:40<19:12:45, 3.62it/s] 33%|███▎ | 120997/371472 [9:37:41<18:50:45, 3.69it/s] 33%|███▎ | 120998/371472 [9:37:41<20:45:23, 3.35it/s] 33%|███▎ | 120999/371472 [9:37:41<20:22:28, 3.41it/s] 33%|███▎ | 121000/371472 [9:37:42<19:20:54, 3.60it/s] {'loss': 3.4221, 'learning_rate': 7.071688681074792e-07, 'epoch': 5.21} + 33%|███▎ | 121000/371472 [9:37:42<19:20:54, 3.60it/s] 33%|███▎ | 121001/371472 [9:37:42<20:24:47, 3.41it/s] 33%|███▎ | 121002/371472 [9:37:42<19:52:01, 3.50it/s] 33%|███▎ | 121003/371472 [9:37:42<19:20:13, 3.60it/s] 33%|███▎ | 121004/371472 [9:37:43<18:50:29, 3.69it/s] 33%|███▎ | 121005/371472 [9:37:43<18:22:18, 3.79it/s] 33%|███▎ | 121006/371472 [9:37:43<18:12:50, 3.82it/s] 33%|███▎ | 121007/371472 [9:37:43<18:05:33, 3.85it/s] 33%|███▎ | 121008/371472 [9:37:44<17:31:42, 3.97it/s] 33%|███▎ | 121009/371472 [9:37:44<17:46:01, 3.92it/s] 33%|███▎ | 121010/371472 [9:37:44<17:24:16, 4.00it/s] 33%|███▎ | 121011/371472 [9:37:44<17:04:28, 4.07it/s] 33%|███▎ | 121012/371472 [9:37:45<18:14:50, 3.81it/s] 33%|███▎ | 121013/371472 [9:37:45<18:12:11, 3.82it/s] 33%|███▎ | 121014/371472 [9:37:45<18:49:01, 3.70it/s] 33%|███▎ | 121015/371472 [9:37:45<18:20:55, 3.79it/s] 33%|███▎ | 121016/371472 [9:37:46<20:32:37, 3.39it/s] 33%|███▎ | 121017/371472 [9:37:46<20:15:32, 3.43it/s] 33%|███▎ | 121018/371472 [9:37:46<18:59:43, 3.66it/s] 33%|███▎ | 121019/371472 [9:37:47<19:13:39, 3.62it/s] 33%|███▎ | 121020/371472 [9:37:47<18:39:10, 3.73it/s] {'loss': 3.1999, 'learning_rate': 7.071203861320002e-07, 'epoch': 5.21} + 33%|███▎ | 121020/371472 [9:37:47<18:39:10, 3.73it/s] 33%|███▎ | 121021/371472 [9:37:47<18:33:44, 3.75it/s] 33%|███▎ | 121022/371472 [9:37:47<19:48:43, 3.51it/s] 33%|███▎ | 121023/371472 [9:37:48<19:51:48, 3.50it/s] 33%|███▎ | 121024/371472 [9:37:48<20:28:16, 3.40it/s] 33%|███▎ | 121025/371472 [9:37:48<20:03:51, 3.47it/s] 33%|███▎ | 121026/371472 [9:37:49<20:05:54, 3.46it/s] 33%|███▎ | 121027/371472 [9:37:49<20:37:06, 3.37it/s] 33%|███▎ | 121028/371472 [9:37:49<21:01:58, 3.31it/s] 33%|███▎ | 121029/371472 [9:37:50<21:42:36, 3.20it/s] 33%|███▎ | 121030/371472 [9:37:50<21:51:53, 3.18it/s] 33%|███▎ | 121031/371472 [9:37:50<20:17:30, 3.43it/s] 33%|███▎ | 121032/371472 [9:37:50<20:14:19, 3.44it/s] 33%|███▎ | 121033/371472 [9:37:51<19:24:38, 3.58it/s] 33%|███▎ | 121034/371472 [9:37:51<19:46:17, 3.52it/s] 33%|███▎ | 121035/371472 [9:37:51<20:54:19, 3.33it/s] 33%|███▎ | 121036/371472 [9:37:52<20:31:46, 3.39it/s] 33%|███▎ | 121037/371472 [9:37:52<19:24:43, 3.58it/s] 33%|███▎ | 121038/371472 [9:37:52<19:14:23, 3.62it/s] 33%|███▎ | 121039/371472 [9:37:52<18:40:19, 3.73it/s] 33%|███▎ | 121040/371472 [9:37:53<18:55:25, 3.68it/s] {'loss': 3.1014, 'learning_rate': 7.070719041565213e-07, 'epoch': 5.21} + 33%|███▎ | 121040/371472 [9:37:53<18:55:25, 3.68it/s] 33%|███▎ | 121041/371472 [9:37:53<18:44:40, 3.71it/s] 33%|███▎ | 121042/371472 [9:37:53<19:00:47, 3.66it/s] 33%|███▎ | 121043/371472 [9:37:53<18:53:18, 3.68it/s] 33%|███▎ | 121044/371472 [9:37:54<19:01:02, 3.66it/s] 33%|███▎ | 121045/371472 [9:37:54<20:27:53, 3.40it/s] 33%|███▎ | 121046/371472 [9:37:54<22:27:58, 3.10it/s] 33%|███▎ | 121047/371472 [9:37:55<22:08:30, 3.14it/s] 33%|███▎ | 121048/371472 [9:37:55<20:41:17, 3.36it/s] 33%|███▎ | 121049/371472 [9:37:55<20:23:09, 3.41it/s] 33%|███▎ | 121050/371472 [9:37:56<19:23:54, 3.59it/s] 33%|███▎ | 121051/371472 [9:37:56<18:55:00, 3.68it/s] 33%|███▎ | 121052/371472 [9:37:56<19:29:04, 3.57it/s] 33%|███▎ | 121053/371472 [9:37:56<18:44:50, 3.71it/s] 33%|███▎ | 121054/371472 [9:37:57<18:52:06, 3.69it/s] 33%|███▎ | 121055/371472 [9:37:57<19:31:25, 3.56it/s] 33%|███▎ | 121056/371472 [9:37:57<19:16:37, 3.61it/s] 33%|███▎ | 121057/371472 [9:37:57<18:27:45, 3.77it/s] 33%|███▎ | 121058/371472 [9:37:58<17:59:53, 3.86it/s] 33%|███▎ | 121059/371472 [9:37:58<19:45:54, 3.52it/s] 33%|███▎ | 121060/371472 [9:37:58<20:39:02, 3.37it/s] {'loss': 3.2871, 'learning_rate': 7.070234221810424e-07, 'epoch': 5.21} + 33%|███▎ | 121060/371472 [9:37:58<20:39:02, 3.37it/s] 33%|███▎ | 121061/371472 [9:37:59<20:12:05, 3.44it/s] 33%|███▎ | 121062/371472 [9:37:59<20:23:50, 3.41it/s] 33%|███▎ | 121063/371472 [9:37:59<21:13:22, 3.28it/s] 33%|███▎ | 121064/371472 [9:38:00<20:35:48, 3.38it/s] 33%|███▎ | 121065/371472 [9:38:00<19:33:33, 3.56it/s] 33%|███▎ | 121066/371472 [9:38:00<18:52:12, 3.69it/s] 33%|███▎ | 121067/371472 [9:38:00<20:18:29, 3.43it/s] 33%|███▎ | 121068/371472 [9:38:01<19:32:42, 3.56it/s] 33%|███▎ | 121069/371472 [9:38:01<19:17:03, 3.61it/s] 33%|███▎ | 121070/371472 [9:38:01<19:10:02, 3.63it/s] 33%|███▎ | 121071/371472 [9:38:01<19:01:45, 3.66it/s] 33%|███▎ | 121072/371472 [9:38:02<19:20:57, 3.59it/s] 33%|███▎ | 121073/371472 [9:38:02<19:17:10, 3.61it/s] 33%|███▎ | 121074/371472 [9:38:02<19:21:06, 3.59it/s] 33%|███▎ | 121075/371472 [9:38:03<20:42:52, 3.36it/s] 33%|███▎ | 121076/371472 [9:38:03<20:42:29, 3.36it/s] 33%|███▎ | 121077/371472 [9:38:03<20:21:39, 3.42it/s] 33%|███▎ | 121078/371472 [9:38:03<19:38:12, 3.54it/s] 33%|███▎ | 121079/371472 [9:38:04<20:16:43, 3.43it/s] 33%|███▎ | 121080/371472 [9:38:04<20:19:01, 3.42it/s] {'loss': 3.2395, 'learning_rate': 7.069749402055636e-07, 'epoch': 5.22} + 33%|███▎ | 121080/371472 [9:38:04<20:19:01, 3.42it/s] 33%|███▎ | 121081/371472 [9:38:04<18:55:55, 3.67it/s] 33%|███▎ | 121082/371472 [9:38:05<20:07:27, 3.46it/s] 33%|███▎ | 121083/371472 [9:38:05<19:55:08, 3.49it/s] 33%|███▎ | 121084/371472 [9:38:05<19:14:37, 3.61it/s] 33%|███▎ | 121085/371472 [9:38:05<18:52:03, 3.69it/s] 33%|███▎ | 121086/371472 [9:38:06<18:25:34, 3.77it/s] 33%|███▎ | 121087/371472 [9:38:06<17:41:56, 3.93it/s] 33%|███▎ | 121088/371472 [9:38:06<19:53:10, 3.50it/s] 33%|███▎ | 121089/371472 [9:38:07<20:11:21, 3.44it/s] 33%|███▎ | 121090/371472 [9:38:07<19:52:12, 3.50it/s] 33%|███▎ | 121091/371472 [9:38:07<18:46:11, 3.71it/s] 33%|███▎ | 121092/371472 [9:38:07<19:48:37, 3.51it/s] 33%|███▎ | 121093/371472 [9:38:08<19:51:55, 3.50it/s] 33%|███▎ | 121094/371472 [9:38:08<19:28:52, 3.57it/s] 33%|███▎ | 121095/371472 [9:38:08<19:12:47, 3.62it/s] 33%|███▎ | 121096/371472 [9:38:09<20:15:32, 3.43it/s] 33%|███▎ | 121097/371472 [9:38:09<20:51:39, 3.33it/s] 33%|███▎ | 121098/371472 [9:38:09<22:02:30, 3.16it/s] 33%|███▎ | 121099/371472 [9:38:10<23:01:10, 3.02it/s] 33%|███▎ | 121100/371472 [9:38:10<22:04:13, 3.15it/s] {'loss': 3.1326, 'learning_rate': 7.069264582300848e-07, 'epoch': 5.22} + 33%|███▎ | 121100/371472 [9:38:10<22:04:13, 3.15it/s] 33%|███▎ | 121101/371472 [9:38:10<21:11:17, 3.28it/s] 33%|███▎ | 121102/371472 [9:38:10<20:36:10, 3.38it/s] 33%|███▎ | 121103/371472 [9:38:11<19:57:01, 3.49it/s] 33%|███▎ | 121104/371472 [9:38:11<18:57:35, 3.67it/s] 33%|███▎ | 121105/371472 [9:38:11<20:08:18, 3.45it/s] 33%|███▎ | 121106/371472 [9:38:12<19:38:15, 3.54it/s] 33%|███▎ | 121107/371472 [9:38:12<20:43:14, 3.36it/s] 33%|███▎ | 121108/371472 [9:38:12<19:48:43, 3.51it/s] 33%|███▎ | 121109/371472 [9:38:12<19:49:02, 3.51it/s] 33%|███▎ | 121110/371472 [9:38:13<21:13:43, 3.28it/s] 33%|███▎ | 121111/371472 [9:38:13<20:04:19, 3.46it/s] 33%|███▎ | 121112/371472 [9:38:13<19:56:58, 3.49it/s] 33%|███▎ | 121113/371472 [9:38:14<19:42:58, 3.53it/s] 33%|███▎ | 121114/371472 [9:38:14<19:43:37, 3.53it/s] 33%|███▎ | 121115/371472 [9:38:14<19:57:13, 3.49it/s] 33%|███▎ | 121116/371472 [9:38:14<19:16:40, 3.61it/s] 33%|███▎ | 121117/371472 [9:38:15<20:34:51, 3.38it/s] 33%|███▎ | 121118/371472 [9:38:15<19:22:07, 3.59it/s] 33%|███▎ | 121119/371472 [9:38:15<19:43:07, 3.53it/s] 33%|███▎ | 121120/371472 [9:38:16<19:09:55, 3.63it/s] {'loss': 3.1206, 'learning_rate': 7.068779762546058e-07, 'epoch': 5.22} + 33%|███▎ | 121120/371472 [9:38:16<19:09:55, 3.63it/s] 33%|███▎ | 121121/371472 [9:38:16<18:39:55, 3.73it/s] 33%|███▎ | 121122/371472 [9:38:16<18:18:58, 3.80it/s] 33%|███▎ | 121123/371472 [9:38:16<17:50:55, 3.90it/s] 33%|███▎ | 121124/371472 [9:38:17<18:04:40, 3.85it/s] 33%|███▎ | 121125/371472 [9:38:17<17:55:42, 3.88it/s] 33%|███▎ | 121126/371472 [9:38:17<18:45:25, 3.71it/s] 33%|███▎ | 121127/371472 [9:38:17<18:21:10, 3.79it/s] 33%|███▎ | 121128/371472 [9:38:18<19:52:39, 3.50it/s] 33%|███▎ | 121129/371472 [9:38:18<18:45:45, 3.71it/s] 33%|███▎ | 121130/371472 [9:38:18<19:01:17, 3.66it/s] 33%|███▎ | 121131/371472 [9:38:18<18:11:59, 3.82it/s] 33%|███▎ | 121132/371472 [9:38:19<17:40:05, 3.94it/s] 33%|███▎ | 121133/371472 [9:38:19<17:33:17, 3.96it/s] 33%|███▎ | 121134/371472 [9:38:19<17:41:40, 3.93it/s] 33%|███▎ | 121135/371472 [9:38:19<17:52:00, 3.89it/s] 33%|███▎ | 121136/371472 [9:38:20<18:49:24, 3.69it/s] 33%|███▎ | 121137/371472 [9:38:20<19:13:35, 3.62it/s] 33%|███▎ | 121138/371472 [9:38:20<19:53:47, 3.49it/s] 33%|███▎ | 121139/371472 [9:38:21<19:22:44, 3.59it/s] 33%|███▎ | 121140/371472 [9:38:21<19:28:10, 3.57it/s] {'loss': 3.368, 'learning_rate': 7.068294942791268e-07, 'epoch': 5.22} + 33%|███▎ | 121140/371472 [9:38:21<19:28:10, 3.57it/s] 33%|███▎ | 121141/371472 [9:38:21<19:35:05, 3.55it/s] 33%|███▎ | 121142/371472 [9:38:21<20:12:18, 3.44it/s] 33%|███▎ | 121143/371472 [9:38:22<19:50:56, 3.50it/s] 33%|███▎ | 121144/371472 [9:38:22<19:15:57, 3.61it/s] 33%|███▎ | 121145/371472 [9:38:22<18:42:07, 3.72it/s] 33%|███▎ | 121146/371472 [9:38:23<18:56:34, 3.67it/s] 33%|███▎ | 121147/371472 [9:38:23<18:49:12, 3.69it/s] 33%|███▎ | 121148/371472 [9:38:23<18:38:21, 3.73it/s] 33%|███▎ | 121149/371472 [9:38:23<18:11:00, 3.82it/s] 33%|███▎ | 121150/371472 [9:38:24<17:59:43, 3.86it/s] 33%|███▎ | 121151/371472 [9:38:24<17:49:57, 3.90it/s] 33%|███▎ | 121152/371472 [9:38:24<17:39:13, 3.94it/s] 33%|███▎ | 121153/371472 [9:38:24<18:06:19, 3.84it/s] 33%|███▎ | 121154/371472 [9:38:25<18:15:37, 3.81it/s] 33%|███▎ | 121155/371472 [9:38:25<18:26:03, 3.77it/s] 33%|███▎ | 121156/371472 [9:38:25<18:54:16, 3.68it/s] 33%|███▎ | 121157/371472 [9:38:25<18:14:10, 3.81it/s] 33%|███▎ | 121158/371472 [9:38:26<17:51:56, 3.89it/s] 33%|███▎ | 121159/371472 [9:38:26<18:01:08, 3.86it/s] 33%|███▎ | 121160/371472 [9:38:26<17:49:44, 3.90it/s] {'loss': 3.2948, 'learning_rate': 7.06781012303648e-07, 'epoch': 5.22} + 33%|███▎ | 121160/371472 [9:38:26<17:49:44, 3.90it/s] 33%|███▎ | 121161/371472 [9:38:26<18:10:13, 3.83it/s] 33%|███▎ | 121162/371472 [9:38:27<18:06:03, 3.84it/s] 33%|███▎ | 121163/371472 [9:38:27<18:49:58, 3.69it/s] 33%|███▎ | 121164/371472 [9:38:27<19:41:41, 3.53it/s] 33%|███▎ | 121165/371472 [9:38:28<19:09:43, 3.63it/s] 33%|███▎ | 121166/371472 [9:38:28<18:40:22, 3.72it/s] 33%|███▎ | 121167/371472 [9:38:28<18:44:50, 3.71it/s] 33%|███▎ | 121168/371472 [9:38:28<19:01:05, 3.66it/s] 33%|███▎ | 121169/371472 [9:38:29<19:57:44, 3.48it/s] 33%|███▎ | 121170/371472 [9:38:29<19:38:04, 3.54it/s] 33%|███▎ | 121171/371472 [9:38:29<20:21:02, 3.42it/s] 33%|███▎ | 121172/371472 [9:38:29<19:11:02, 3.62it/s] 33%|███▎ | 121173/371472 [9:38:30<18:48:58, 3.70it/s] 33%|███▎ | 121174/371472 [9:38:30<18:13:54, 3.81it/s] 33%|███▎ | 121175/371472 [9:38:30<18:22:25, 3.78it/s] 33%|███▎ | 121176/371472 [9:38:31<20:56:14, 3.32it/s] 33%|███▎ | 121177/371472 [9:38:31<20:49:05, 3.34it/s] 33%|███▎ | 121178/371472 [9:38:31<21:51:24, 3.18it/s] 33%|███▎ | 121179/371472 [9:38:32<20:41:48, 3.36it/s] 33%|███▎ | 121180/371472 [9:38:32<20:49:30, 3.34it/s] {'loss': 3.284, 'learning_rate': 7.06732530328169e-07, 'epoch': 5.22} + 33%|███▎ | 121180/371472 [9:38:32<20:49:30, 3.34it/s] 33%|███▎ | 121181/371472 [9:38:32<20:06:24, 3.46it/s] 33%|███▎ | 121182/371472 [9:38:32<20:37:54, 3.37it/s] 33%|███▎ | 121183/371472 [9:38:33<20:11:45, 3.44it/s] 33%|███▎ | 121184/371472 [9:38:33<19:59:10, 3.48it/s] 33%|███▎ | 121185/371472 [9:38:33<20:34:47, 3.38it/s] 33%|███▎ | 121186/371472 [9:38:34<20:42:05, 3.36it/s] 33%|███▎ | 121187/371472 [9:38:34<21:02:16, 3.30it/s] 33%|███▎ | 121188/371472 [9:38:34<19:59:00, 3.48it/s] 33%|███▎ | 121189/371472 [9:38:34<18:57:50, 3.67it/s] 33%|███▎ | 121190/371472 [9:38:35<18:47:46, 3.70it/s] 33%|███▎ | 121191/371472 [9:38:35<18:08:52, 3.83it/s] 33%|███▎ | 121192/371472 [9:38:35<18:23:55, 3.78it/s] 33%|███▎ | 121193/371472 [9:38:35<18:03:50, 3.85it/s] 33%|███▎ | 121194/371472 [9:38:36<17:47:47, 3.91it/s] 33%|███▎ | 121195/371472 [9:38:36<18:12:51, 3.82it/s] 33%|███▎ | 121196/371472 [9:38:36<18:13:19, 3.82it/s] 33%|███▎ | 121197/371472 [9:38:37<18:30:37, 3.76it/s] 33%|███▎ | 121198/371472 [9:38:37<18:15:46, 3.81it/s] 33%|███▎ | 121199/371472 [9:38:37<19:15:45, 3.61it/s] 33%|███▎ | 121200/371472 [9:38:37<18:33:59, 3.74it/s] {'loss': 3.4241, 'learning_rate': 7.066840483526901e-07, 'epoch': 5.22} + 33%|███▎ | 121200/371472 [9:38:37<18:33:59, 3.74it/s] 33%|███▎ | 121201/371472 [9:38:38<18:48:03, 3.70it/s] 33%|███▎ | 121202/371472 [9:38:38<18:17:57, 3.80it/s] 33%|███▎ | 121203/371472 [9:38:38<18:54:13, 3.68it/s] 33%|███▎ | 121204/371472 [9:38:38<18:59:34, 3.66it/s] 33%|███▎ | 121205/371472 [9:38:39<18:29:40, 3.76it/s] 33%|███▎ | 121206/371472 [9:38:39<17:52:53, 3.89it/s] 33%|███▎ | 121207/371472 [9:38:39<17:25:19, 3.99it/s] 33%|███▎ | 121208/371472 [9:38:39<17:53:48, 3.88it/s] 33%|███▎ | 121209/371472 [9:38:40<18:22:15, 3.78it/s] 33%|███▎ | 121210/371472 [9:38:40<19:18:59, 3.60it/s] 33%|███▎ | 121211/371472 [9:38:40<20:00:45, 3.47it/s] 33%|███▎ | 121212/371472 [9:38:41<19:26:36, 3.58it/s] 33%|███▎ | 121213/371472 [9:38:41<18:31:18, 3.75it/s] 33%|███▎ | 121214/371472 [9:38:41<18:16:47, 3.80it/s] 33%|███▎ | 121215/371472 [9:38:41<18:07:48, 3.83it/s] 33%|███▎ | 121216/371472 [9:38:42<20:11:46, 3.44it/s] 33%|███▎ | 121217/371472 [9:38:42<19:33:40, 3.55it/s] 33%|███▎ | 121218/371472 [9:38:42<19:14:29, 3.61it/s] 33%|███▎ | 121219/371472 [9:38:42<18:46:16, 3.70it/s] 33%|███▎ | 121220/371472 [9:38:43<18:23:13, 3.78it/s] {'loss': 3.1637, 'learning_rate': 7.066355663772113e-07, 'epoch': 5.22} + 33%|███▎ | 121220/371472 [9:38:43<18:23:13, 3.78it/s] 33%|███▎ | 121221/371472 [9:38:43<19:01:38, 3.65it/s] 33%|███▎ | 121222/371472 [9:38:43<19:12:27, 3.62it/s] 33%|███▎ | 121223/371472 [9:38:44<18:33:37, 3.75it/s] 33%|███▎ | 121224/371472 [9:38:44<18:29:39, 3.76it/s] 33%|███▎ | 121225/371472 [9:38:44<18:08:47, 3.83it/s] 33%|███▎ | 121226/371472 [9:38:44<19:45:34, 3.52it/s] 33%|███▎ | 121227/371472 [9:38:45<19:11:55, 3.62it/s] 33%|███▎ | 121228/371472 [9:38:45<20:23:28, 3.41it/s] 33%|███▎ | 121229/371472 [9:38:45<21:12:41, 3.28it/s] 33%|███▎ | 121230/371472 [9:38:46<20:37:57, 3.37it/s] 33%|███▎ | 121231/371472 [9:38:46<20:53:20, 3.33it/s] 33%|███▎ | 121232/371472 [9:38:46<21:05:01, 3.30it/s] 33%|███▎ | 121233/371472 [9:38:46<20:46:03, 3.35it/s] 33%|███▎ | 121234/371472 [9:38:47<20:23:33, 3.41it/s] 33%|███▎ | 121235/371472 [9:38:47<19:55:52, 3.49it/s] 33%|███▎ | 121236/371472 [9:38:47<19:55:26, 3.49it/s] 33%|███▎ | 121237/371472 [9:38:48<19:14:13, 3.61it/s] 33%|███▎ | 121238/371472 [9:38:48<19:22:06, 3.59it/s] 33%|███▎ | 121239/371472 [9:38:48<19:46:52, 3.51it/s] 33%|███▎ | 121240/371472 [9:38:48<19:48:05, 3.51it/s] {'loss': 3.2195, 'learning_rate': 7.065870844017325e-07, 'epoch': 5.22} + 33%|███▎ | 121240/371472 [9:38:48<19:48:05, 3.51it/s] 33%|███▎ | 121241/371472 [9:38:49<19:25:47, 3.58it/s] 33%|███▎ | 121242/371472 [9:38:49<18:47:53, 3.70it/s] 33%|███▎ | 121243/371472 [9:38:49<19:05:09, 3.64it/s] 33%|███▎ | 121244/371472 [9:38:50<18:59:57, 3.66it/s] 33%|███▎ | 121245/371472 [9:38:50<20:17:01, 3.43it/s] 33%|███▎ | 121246/371472 [9:38:50<20:01:10, 3.47it/s] 33%|███▎ | 121247/371472 [9:38:50<19:22:30, 3.59it/s] 33%|███▎ | 121248/371472 [9:38:51<18:52:04, 3.68it/s] 33%|███▎ | 121249/371472 [9:38:51<19:10:21, 3.63it/s] 33%|███▎ | 121250/371472 [9:38:51<19:09:26, 3.63it/s] 33%|███▎ | 121251/371472 [9:38:51<19:02:06, 3.65it/s] 33%|███▎ | 121252/371472 [9:38:52<18:39:03, 3.73it/s] 33%|███▎ | 121253/371472 [9:38:52<18:27:34, 3.77it/s] 33%|███▎ | 121254/371472 [9:38:52<18:11:58, 3.82it/s] 33%|███▎ | 121255/371472 [9:38:53<18:28:17, 3.76it/s] 33%|███▎ | 121256/371472 [9:38:53<19:53:53, 3.49it/s] 33%|███▎ | 121257/371472 [9:38:53<18:54:36, 3.68it/s] 33%|███▎ | 121258/371472 [9:38:53<18:07:59, 3.83it/s] 33%|███▎ | 121259/371472 [9:38:54<17:52:39, 3.89it/s] 33%|███▎ | 121260/371472 [9:38:54<18:32:35, 3.75it/s] {'loss': 3.208, 'learning_rate': 7.065386024262534e-07, 'epoch': 5.22} + 33%|███▎ | 121260/371472 [9:38:54<18:32:35, 3.75it/s] 33%|███▎ | 121261/371472 [9:38:54<18:25:54, 3.77it/s] 33%|███▎ | 121262/371472 [9:38:54<17:54:15, 3.88it/s] 33%|███▎ | 121263/371472 [9:38:55<18:35:58, 3.74it/s] 33%|███▎ | 121264/371472 [9:38:55<18:26:40, 3.77it/s] 33%|███▎ | 121265/371472 [9:38:55<18:25:58, 3.77it/s] 33%|███▎ | 121266/371472 [9:38:55<18:38:15, 3.73it/s] 33%|███▎ | 121267/371472 [9:38:56<18:13:43, 3.81it/s] 33%|███▎ | 121268/371472 [9:38:56<18:34:31, 3.74it/s] 33%|███▎ | 121269/371472 [9:38:56<18:19:57, 3.79it/s] 33%|███▎ | 121270/371472 [9:38:57<18:33:10, 3.75it/s] 33%|███▎ | 121271/371472 [9:38:57<19:05:11, 3.64it/s] 33%|███▎ | 121272/371472 [9:38:57<20:00:26, 3.47it/s] 33%|███▎ | 121273/371472 [9:38:57<19:05:07, 3.64it/s] 33%|███▎ | 121274/371472 [9:38:58<19:40:04, 3.53it/s] 33%|███▎ | 121275/371472 [9:38:58<18:56:14, 3.67it/s] 33%|███▎ | 121276/371472 [9:38:58<18:58:23, 3.66it/s] 33%|███▎ | 121277/371472 [9:38:58<19:02:36, 3.65it/s] 33%|███▎ | 121278/371472 [9:38:59<18:49:54, 3.69it/s] 33%|███▎ | 121279/371472 [9:38:59<19:55:33, 3.49it/s] 33%|███▎ | 121280/371472 [9:38:59<19:40:46, 3.53it/s] {'loss': 3.3566, 'learning_rate': 7.064901204507745e-07, 'epoch': 5.22} + 33%|███▎ | 121280/371472 [9:38:59<19:40:46, 3.53it/s] 33%|███▎ | 121281/371472 [9:39:00<19:54:31, 3.49it/s] 33%|███▎ | 121282/371472 [9:39:00<19:08:59, 3.63it/s] 33%|███▎ | 121283/371472 [9:39:00<18:41:06, 3.72it/s] 33%|███▎ | 121284/371472 [9:39:00<19:29:29, 3.57it/s] 33%|███▎ | 121285/371472 [9:39:01<18:56:01, 3.67it/s] 33%|███▎ | 121286/371472 [9:39:01<18:34:50, 3.74it/s] 33%|███▎ | 121287/371472 [9:39:01<18:04:02, 3.85it/s] 33%|███▎ | 121288/371472 [9:39:01<18:16:26, 3.80it/s] 33%|███▎ | 121289/371472 [9:39:02<18:47:04, 3.70it/s] 33%|███▎ | 121290/371472 [9:39:02<19:04:16, 3.64it/s] 33%|███▎ | 121291/371472 [9:39:02<19:28:43, 3.57it/s] 33%|███▎ | 121292/371472 [9:39:03<19:21:34, 3.59it/s] 33%|███▎ | 121293/371472 [9:39:03<18:58:35, 3.66it/s] 33%|███▎ | 121294/371472 [9:39:03<19:18:34, 3.60it/s] 33%|███▎ | 121295/371472 [9:39:03<18:35:49, 3.74it/s] 33%|███▎ | 121296/371472 [9:39:04<18:48:31, 3.69it/s] 33%|███▎ | 121297/371472 [9:39:04<18:22:56, 3.78it/s] 33%|███▎ | 121298/371472 [9:39:04<18:52:14, 3.68it/s] 33%|███▎ | 121299/371472 [9:39:04<18:42:39, 3.71it/s] 33%|███▎ | 121300/371472 [9:39:05<18:16:16, 3.80it/s] {'loss': 3.3725, 'learning_rate': 7.064416384752957e-07, 'epoch': 5.22} + 33%|███▎ | 121300/371472 [9:39:05<18:16:16, 3.80it/s] 33%|███▎ | 121301/371472 [9:39:05<19:16:09, 3.61it/s] 33%|███▎ | 121302/371472 [9:39:05<20:12:52, 3.44it/s] 33%|███▎ | 121303/371472 [9:39:06<21:35:14, 3.22it/s] 33%|███▎ | 121304/371472 [9:39:06<20:52:09, 3.33it/s] 33%|███▎ | 121305/371472 [9:39:06<19:51:14, 3.50it/s] 33%|███▎ | 121306/371472 [9:39:07<19:45:27, 3.52it/s] 33%|███▎ | 121307/371472 [9:39:07<19:44:03, 3.52it/s] 33%|███▎ | 121308/371472 [9:39:07<19:26:23, 3.57it/s] 33%|███▎ | 121309/371472 [9:39:07<20:14:33, 3.43it/s] 33%|███▎ | 121310/371472 [9:39:08<19:19:16, 3.60it/s] 33%|███▎ | 121311/371472 [9:39:08<18:44:00, 3.71it/s] 33%|███▎ | 121312/371472 [9:39:08<18:29:33, 3.76it/s] 33%|███▎ | 121313/371472 [9:39:09<21:00:46, 3.31it/s] 33%|███▎ | 121314/371472 [9:39:09<20:04:37, 3.46it/s] 33%|███▎ | 121315/371472 [9:39:09<19:27:54, 3.57it/s] 33%|███▎ | 121316/371472 [9:39:09<21:08:32, 3.29it/s] 33%|███▎ | 121317/371472 [9:39:10<20:28:35, 3.39it/s] 33%|███▎ | 121318/371472 [9:39:10<20:18:41, 3.42it/s] 33%|███▎ | 121319/371472 [9:39:10<24:01:54, 2.89it/s] 33%|███▎ | 121320/371472 [9:39:11<22:43:41, 3.06it/s] {'loss': 3.3575, 'learning_rate': 7.063931564998169e-07, 'epoch': 5.23} + 33%|███▎ | 121320/371472 [9:39:11<22:43:41, 3.06it/s] 33%|███▎ | 121321/371472 [9:39:11<22:21:59, 3.11it/s] 33%|███▎ | 121322/371472 [9:39:11<21:59:10, 3.16it/s] 33%|███▎ | 121323/371472 [9:39:12<21:18:27, 3.26it/s] 33%|███▎ | 121324/371472 [9:39:12<20:34:36, 3.38it/s] 33%|███▎ | 121325/371472 [9:39:12<22:58:13, 3.02it/s] 33%|███▎ | 121326/371472 [9:39:13<22:08:21, 3.14it/s] 33%|███▎ | 121327/371472 [9:39:13<22:34:13, 3.08it/s] 33%|███▎ | 121328/371472 [9:39:13<21:56:41, 3.17it/s] 33%|███▎ | 121329/371472 [9:39:13<20:49:23, 3.34it/s] 33%|███▎ | 121330/371472 [9:39:14<21:16:37, 3.27it/s] 33%|███▎ | 121331/371472 [9:39:14<20:38:33, 3.37it/s] 33%|███▎ | 121332/371472 [9:39:14<20:02:43, 3.47it/s] 33%|███▎ | 121333/371472 [9:39:15<19:53:30, 3.49it/s] 33%|███▎ | 121334/371472 [9:39:15<22:16:40, 3.12it/s] 33%|███▎ | 121335/371472 [9:39:15<21:47:15, 3.19it/s] 33%|███▎ | 121336/371472 [9:39:16<20:32:14, 3.38it/s] 33%|███▎ | 121337/371472 [9:39:16<21:18:35, 3.26it/s] 33%|███▎ | 121338/371472 [9:39:16<22:13:16, 3.13it/s] 33%|███▎ | 121339/371472 [9:39:17<21:26:05, 3.24it/s] 33%|███▎ | 121340/371472 [9:39:17<20:09:11, 3.45it/s] {'loss': 3.3201, 'learning_rate': 7.063446745243379e-07, 'epoch': 5.23} + 33%|███▎ | 121340/371472 [9:39:17<20:09:11, 3.45it/s] 33%|███▎ | 121341/371472 [9:39:17<19:12:51, 3.62it/s] 33%|███▎ | 121342/371472 [9:39:17<20:18:04, 3.42it/s] 33%|███▎ | 121343/371472 [9:39:18<19:41:44, 3.53it/s] 33%|███▎ | 121344/371472 [9:39:18<19:56:24, 3.48it/s] 33%|███▎ | 121345/371472 [9:39:18<20:47:00, 3.34it/s] 33%|███▎ | 121346/371472 [9:39:19<19:49:53, 3.50it/s] 33%|███▎ | 121347/371472 [9:39:19<19:45:42, 3.52it/s] 33%|███▎ | 121348/371472 [9:39:19<20:27:18, 3.40it/s] 33%|███▎ | 121349/371472 [9:39:19<20:08:56, 3.45it/s] 33%|███▎ | 121350/371472 [9:39:20<19:10:11, 3.62it/s] 33%|███▎ | 121351/371472 [9:39:20<22:07:38, 3.14it/s] 33%|███▎ | 121352/371472 [9:39:20<21:34:54, 3.22it/s] 33%|███▎ | 121353/371472 [9:39:21<21:22:13, 3.25it/s] 33%|███▎ | 121354/371472 [9:39:21<20:50:01, 3.33it/s] 33%|███▎ | 121355/371472 [9:39:21<19:36:57, 3.54it/s] 33%|███▎ | 121356/371472 [9:39:21<19:56:00, 3.49it/s] 33%|███▎ | 121357/371472 [9:39:22<19:24:59, 3.58it/s] 33%|███▎ | 121358/371472 [9:39:22<18:45:23, 3.70it/s] 33%|███▎ | 121359/371472 [9:39:22<18:58:33, 3.66it/s] 33%|███▎ | 121360/371472 [9:39:23<18:49:08, 3.69it/s] {'loss': 3.2648, 'learning_rate': 7.06296192548859e-07, 'epoch': 5.23} + 33%|███▎ | 121360/371472 [9:39:23<18:49:08, 3.69it/s] 33%|███▎ | 121361/371472 [9:39:23<19:18:01, 3.60it/s] 33%|███▎ | 121362/371472 [9:39:23<18:24:29, 3.77it/s] 33%|███▎ | 121363/371472 [9:39:23<18:34:41, 3.74it/s] 33%|███▎ | 121364/371472 [9:39:24<20:13:46, 3.43it/s] 33%|███▎ | 121365/371472 [9:39:24<19:55:24, 3.49it/s] 33%|███▎ | 121366/371472 [9:39:24<19:44:46, 3.52it/s] 33%|███▎ | 121367/371472 [9:39:25<21:35:39, 3.22it/s] 33%|███▎ | 121368/371472 [9:39:25<20:28:23, 3.39it/s] 33%|███▎ | 121369/371472 [9:39:25<19:49:48, 3.50it/s] 33%|███▎ | 121370/371472 [9:39:25<20:04:14, 3.46it/s] 33%|███▎ | 121371/371472 [9:39:26<20:23:20, 3.41it/s] 33%|███▎ | 121372/371472 [9:39:26<20:09:36, 3.45it/s] 33%|███▎ | 121373/371472 [9:39:26<19:36:27, 3.54it/s] 33%|███▎ | 121374/371472 [9:39:27<20:05:32, 3.46it/s] 33%|███▎ | 121375/371472 [9:39:27<19:09:56, 3.62it/s] 33%|███▎ | 121376/371472 [9:39:27<18:34:10, 3.74it/s] 33%|███▎ | 121377/371472 [9:39:27<19:04:23, 3.64it/s] 33%|███▎ | 121378/371472 [9:39:28<18:31:10, 3.75it/s] 33%|███▎ | 121379/371472 [9:39:28<18:47:44, 3.70it/s] 33%|███▎ | 121380/371472 [9:39:28<19:24:03, 3.58it/s] {'loss': 3.186, 'learning_rate': 7.062477105733802e-07, 'epoch': 5.23} + 33%|███▎ | 121380/371472 [9:39:28<19:24:03, 3.58it/s] 33%|███▎ | 121381/371472 [9:39:28<18:59:01, 3.66it/s] 33%|███▎ | 121382/371472 [9:39:29<18:19:12, 3.79it/s] 33%|███▎ | 121383/371472 [9:39:29<18:46:46, 3.70it/s] 33%|███▎ | 121384/371472 [9:39:29<19:09:42, 3.63it/s] 33%|███▎ | 121385/371472 [9:39:30<19:38:46, 3.54it/s] 33%|███▎ | 121386/371472 [9:39:30<19:19:13, 3.60it/s] 33%|███▎ | 121387/371472 [9:39:30<19:28:49, 3.57it/s] 33%|███▎ | 121388/371472 [9:39:30<19:22:23, 3.59it/s] 33%|███▎ | 121389/371472 [9:39:31<20:02:08, 3.47it/s] 33%|███▎ | 121390/371472 [9:39:31<19:41:10, 3.53it/s] 33%|███▎ | 121391/371472 [9:39:31<19:42:20, 3.53it/s] 33%|███▎ | 121392/371472 [9:39:32<19:15:49, 3.61it/s] 33%|███▎ | 121393/371472 [9:39:32<18:37:15, 3.73it/s] 33%|███▎ | 121394/371472 [9:39:32<18:16:29, 3.80it/s] 33%|███▎ | 121395/371472 [9:39:32<18:40:59, 3.72it/s] 33%|███▎ | 121396/371472 [9:39:33<18:43:47, 3.71it/s] 33%|███▎ | 121397/371472 [9:39:33<18:28:51, 3.76it/s] 33%|███▎ | 121398/371472 [9:39:33<18:10:45, 3.82it/s] 33%|███▎ | 121399/371472 [9:39:33<18:32:08, 3.75it/s] 33%|███▎ | 121400/371472 [9:39:34<18:49:16, 3.69it/s] {'loss': 3.3726, 'learning_rate': 7.061992285979013e-07, 'epoch': 5.23} + 33%|███▎ | 121400/371472 [9:39:34<18:49:16, 3.69it/s] 33%|███▎ | 121401/371472 [9:39:34<18:55:44, 3.67it/s] 33%|███▎ | 121402/371472 [9:39:34<18:15:28, 3.80it/s] 33%|███▎ | 121403/371472 [9:39:34<18:01:33, 3.85it/s] 33%|███▎ | 121404/371472 [9:39:35<17:57:41, 3.87it/s] 33%|███▎ | 121405/371472 [9:39:35<17:53:55, 3.88it/s] 33%|███▎ | 121406/371472 [9:39:35<17:23:23, 3.99it/s] 33%|███▎ | 121407/371472 [9:39:35<17:16:23, 4.02it/s] 33%|███▎ | 121408/371472 [9:39:36<17:21:32, 4.00it/s] 33%|███▎ | 121409/371472 [9:39:36<17:02:10, 4.08it/s] 33%|███▎ | 121410/371472 [9:39:36<18:18:51, 3.79it/s] 33%|███▎ | 121411/371472 [9:39:36<18:10:58, 3.82it/s] 33%|███▎ | 121412/371472 [9:39:37<18:57:03, 3.67it/s] 33%|███▎ | 121413/371472 [9:39:37<19:05:56, 3.64it/s] 33%|███▎ | 121414/371472 [9:39:37<20:32:57, 3.38it/s] 33%|███▎ | 121415/371472 [9:39:38<19:25:05, 3.58it/s] 33%|███▎ | 121416/371472 [9:39:38<18:24:00, 3.77it/s] 33%|███▎ | 121417/371472 [9:39:38<18:05:43, 3.84it/s] 33%|███▎ | 121418/371472 [9:39:38<19:25:27, 3.58it/s] 33%|███▎ | 121419/371472 [9:39:39<19:40:54, 3.53it/s] 33%|███▎ | 121420/371472 [9:39:39<20:26:16, 3.40it/s] {'loss': 3.2674, 'learning_rate': 7.061507466224223e-07, 'epoch': 5.23} + 33%|███▎ | 121420/371472 [9:39:39<20:26:16, 3.40it/s] 33%|███▎ | 121421/371472 [9:39:39<19:45:02, 3.52it/s] 33%|███▎ | 121422/371472 [9:39:40<18:49:42, 3.69it/s] 33%|███▎ | 121423/371472 [9:39:40<19:43:04, 3.52it/s] 33%|███▎ | 121424/371472 [9:39:40<20:12:58, 3.44it/s] 33%|███▎ | 121425/371472 [9:39:40<20:13:10, 3.44it/s] 33%|███▎ | 121426/371472 [9:39:41<20:06:34, 3.45it/s] 33%|███▎ | 121427/371472 [9:39:41<20:46:06, 3.34it/s] 33%|███▎ | 121428/371472 [9:39:41<21:22:10, 3.25it/s] 33%|███▎ | 121429/371472 [9:39:42<22:08:21, 3.14it/s] 33%|███▎ | 121430/371472 [9:39:42<22:39:02, 3.07it/s] 33%|███▎ | 121431/371472 [9:39:42<21:31:09, 3.23it/s] 33%|███▎ | 121432/371472 [9:39:43<20:55:18, 3.32it/s] 33%|███▎ | 121433/371472 [9:39:43<21:30:40, 3.23it/s] 33%|███▎ | 121434/371472 [9:39:43<21:26:02, 3.24it/s] 33%|███▎ | 121435/371472 [9:39:44<20:25:43, 3.40it/s] 33%|███▎ | 121436/371472 [9:39:44<19:34:35, 3.55it/s] 33%|███▎ | 121437/371472 [9:39:44<21:11:06, 3.28it/s] 33%|███▎ | 121438/371472 [9:39:44<19:52:33, 3.49it/s] 33%|███▎ | 121439/371472 [9:39:45<19:19:55, 3.59it/s] 33%|███▎ | 121440/371472 [9:39:45<18:34:02, 3.74it/s] {'loss': 3.3961, 'learning_rate': 7.061022646469434e-07, 'epoch': 5.23} + 33%|███▎ | 121440/371472 [9:39:45<18:34:02, 3.74it/s] 33%|███▎ | 121441/371472 [9:39:45<18:48:49, 3.69it/s] 33%|███▎ | 121442/371472 [9:39:46<20:54:20, 3.32it/s] 33%|███▎ | 121443/371472 [9:39:46<20:24:00, 3.40it/s] 33%|███▎ | 121444/371472 [9:39:46<19:58:46, 3.48it/s] 33%|���██▎ | 121445/371472 [9:39:46<19:49:39, 3.50it/s] 33%|███▎ | 121446/371472 [9:39:47<19:03:47, 3.64it/s] 33%|███▎ | 121447/371472 [9:39:47<20:00:28, 3.47it/s] 33%|███▎ | 121448/371472 [9:39:47<20:00:54, 3.47it/s] 33%|███▎ | 121449/371472 [9:39:47<19:29:41, 3.56it/s] 33%|███▎ | 121450/371472 [9:39:48<19:08:15, 3.63it/s] 33%|███▎ | 121451/371472 [9:39:48<18:49:13, 3.69it/s] 33%|███▎ | 121452/371472 [9:39:48<19:15:07, 3.61it/s] 33%|███▎ | 121453/371472 [9:39:49<18:49:10, 3.69it/s] 33%|███▎ | 121454/371472 [9:39:49<18:54:01, 3.67it/s] 33%|███▎ | 121455/371472 [9:39:49<19:40:37, 3.53it/s] 33%|███▎ | 121456/371472 [9:39:49<19:49:39, 3.50it/s] 33%|███▎ | 121457/371472 [9:39:50<19:18:00, 3.60it/s] 33%|███▎ | 121458/371472 [9:39:50<18:38:46, 3.72it/s] 33%|███▎ | 121459/371472 [9:39:50<18:20:14, 3.79it/s] 33%|███▎ | 121460/371472 [9:39:51<20:26:16, 3.40it/s] {'loss': 3.0819, 'learning_rate': 7.060537826714646e-07, 'epoch': 5.23} + 33%|███▎ | 121460/371472 [9:39:51<20:26:16, 3.40it/s] 33%|███▎ | 121461/371472 [9:39:51<20:23:33, 3.41it/s] 33%|███▎ | 121462/371472 [9:39:51<19:13:05, 3.61it/s] 33%|███▎ | 121463/371472 [9:39:51<18:35:45, 3.73it/s] 33%|███▎ | 121464/371472 [9:39:52<19:01:27, 3.65it/s] 33%|███▎ | 121465/371472 [9:39:52<19:01:07, 3.65it/s] 33%|███▎ | 121466/371472 [9:39:52<18:25:49, 3.77it/s] 33%|███▎ | 121467/371472 [9:39:52<20:38:47, 3.36it/s] 33%|███▎ | 121468/371472 [9:39:53<20:10:30, 3.44it/s] 33%|███▎ | 121469/371472 [9:39:53<21:59:39, 3.16it/s] 33%|███▎ | 121470/371472 [9:39:54<24:13:45, 2.87it/s] 33%|███▎ | 121471/371472 [9:39:54<23:11:21, 2.99it/s] 33%|███▎ | 121472/371472 [9:39:54<22:50:17, 3.04it/s] 33%|███▎ | 121473/371472 [9:39:54<21:26:31, 3.24it/s] 33%|███▎ | 121474/371472 [9:39:55<20:16:21, 3.43it/s] 33%|███▎ | 121475/371472 [9:39:55<20:20:21, 3.41it/s] 33%|███▎ | 121476/371472 [9:39:55<20:22:12, 3.41it/s] 33%|███▎ | 121477/371472 [9:39:56<19:41:15, 3.53it/s] 33%|███▎ | 121478/371472 [9:39:56<20:42:41, 3.35it/s] 33%|███▎ | 121479/371472 [9:39:56<19:53:53, 3.49it/s] 33%|███▎ | 121480/371472 [9:39:56<19:14:04, 3.61it/s] {'loss': 3.3016, 'learning_rate': 7.060053006959857e-07, 'epoch': 5.23} + 33%|███▎ | 121480/371472 [9:39:56<19:14:04, 3.61it/s] 33%|███▎ | 121481/371472 [9:39:57<19:45:27, 3.51it/s] 33%|███▎ | 121482/371472 [9:39:57<19:50:45, 3.50it/s] 33%|███▎ | 121483/371472 [9:39:57<19:20:47, 3.59it/s] 33%|███▎ | 121484/371472 [9:39:58<19:33:05, 3.55it/s] 33%|███▎ | 121485/371472 [9:39:58<21:20:07, 3.25it/s] 33%|███▎ | 121486/371472 [9:39:58<20:45:59, 3.34it/s] 33%|███▎ | 121487/371472 [9:39:59<21:48:38, 3.18it/s] 33%|███▎ | 121488/371472 [9:39:59<20:44:23, 3.35it/s] 33%|███▎ | 121489/371472 [9:39:59<21:48:56, 3.18it/s] 33%|███▎ | 121490/371472 [9:39:59<21:29:21, 3.23it/s] 33%|███▎ | 121491/371472 [9:40:00<20:12:54, 3.43it/s] 33%|███▎ | 121492/371472 [9:40:00<19:27:41, 3.57it/s] 33%|███▎ | 121493/371472 [9:40:00<20:18:00, 3.42it/s] 33%|███▎ | 121494/371472 [9:40:01<19:51:17, 3.50it/s] 33%|███▎ | 121495/371472 [9:40:01<19:44:05, 3.52it/s] 33%|███▎ | 121496/371472 [9:40:01<19:13:24, 3.61it/s] 33%|███▎ | 121497/371472 [9:40:01<19:01:01, 3.65it/s] 33%|███▎ | 121498/371472 [9:40:02<19:26:28, 3.57it/s] 33%|███▎ | 121499/371472 [9:40:02<18:48:12, 3.69it/s] 33%|███▎ | 121500/371472 [9:40:02<18:45:30, 3.70it/s] {'loss': 3.2763, 'learning_rate': 7.059568187205068e-07, 'epoch': 5.23} + 33%|███▎ | 121500/371472 [9:40:02<18:45:30, 3.70it/s] 33%|███▎ | 121501/371472 [9:40:03<20:25:11, 3.40it/s] 33%|███▎ | 121502/371472 [9:40:03<20:34:32, 3.37it/s] 33%|███▎ | 121503/371472 [9:40:03<20:35:26, 3.37it/s] 33%|███▎ | 121504/371472 [9:40:03<19:47:42, 3.51it/s] 33%|███▎ | 121505/371472 [9:40:04<19:13:34, 3.61it/s] 33%|███▎ | 121506/371472 [9:40:04<18:58:24, 3.66it/s] 33%|███▎ | 121507/371472 [9:40:04<19:36:06, 3.54it/s] 33%|███▎ | 121508/371472 [9:40:04<19:28:12, 3.57it/s] 33%|███▎ | 121509/371472 [9:40:05<18:51:44, 3.68it/s] 33%|███▎ | 121510/371472 [9:40:05<19:54:58, 3.49it/s] 33%|███▎ | 121511/371472 [9:40:05<20:10:18, 3.44it/s] 33%|███▎ | 121512/371472 [9:40:06<19:44:12, 3.52it/s] 33%|███▎ | 121513/371472 [9:40:06<19:37:51, 3.54it/s] 33%|███▎ | 121514/371472 [9:40:06<18:55:38, 3.67it/s] 33%|███▎ | 121515/371472 [9:40:06<18:22:33, 3.78it/s] 33%|███▎ | 121516/371472 [9:40:07<18:41:25, 3.71it/s] 33%|███▎ | 121517/371472 [9:40:07<21:08:29, 3.28it/s] 33%|███▎ | 121518/371472 [9:40:07<19:52:31, 3.49it/s] 33%|███▎ | 121519/371472 [9:40:08<18:54:10, 3.67it/s] 33%|███▎ | 121520/371472 [9:40:08<18:36:18, 3.73it/s] {'loss': 3.2682, 'learning_rate': 7.059083367450278e-07, 'epoch': 5.23} + 33%|███▎ | 121520/371472 [9:40:08<18:36:18, 3.73it/s] 33%|███▎ | 121521/371472 [9:40:08<18:00:19, 3.86it/s] 33%|███▎ | 121522/371472 [9:40:08<18:52:54, 3.68it/s] 33%|███▎ | 121523/371472 [9:40:09<18:43:53, 3.71it/s] 33%|███▎ | 121524/371472 [9:40:09<18:56:43, 3.66it/s] 33%|███▎ | 121525/371472 [9:40:09<19:11:33, 3.62it/s] 33%|███▎ | 121526/371472 [9:40:09<19:13:17, 3.61it/s] 33%|███▎ | 121527/371472 [9:40:10<18:56:16, 3.67it/s] 33%|███▎ | 121528/371472 [9:40:10<19:17:34, 3.60it/s] 33%|███▎ | 121529/371472 [9:40:10<19:25:06, 3.58it/s] 33%|███▎ | 121530/371472 [9:40:11<18:59:25, 3.66it/s] 33%|███▎ | 121531/371472 [9:40:11<18:16:17, 3.80it/s] 33%|███▎ | 121532/371472 [9:40:11<18:24:25, 3.77it/s] 33%|███▎ | 121533/371472 [9:40:11<18:36:50, 3.73it/s] 33%|███▎ | 121534/371472 [9:40:12<18:48:36, 3.69it/s] 33%|███▎ | 121535/371472 [9:40:12<18:24:54, 3.77it/s] 33%|███▎ | 121536/371472 [9:40:12<19:00:17, 3.65it/s] 33%|███▎ | 121537/371472 [9:40:12<18:59:38, 3.66it/s] 33%|███▎ | 121538/371472 [9:40:13<18:31:35, 3.75it/s] 33%|███▎ | 121539/371472 [9:40:13<19:02:26, 3.65it/s] 33%|███▎ | 121540/371472 [9:40:13<18:54:27, 3.67it/s] {'loss': 3.3309, 'learning_rate': 7.05859854769549e-07, 'epoch': 5.23} + 33%|███▎ | 121540/371472 [9:40:13<18:54:27, 3.67it/s] 33%|███▎ | 121541/371472 [9:40:14<20:34:48, 3.37it/s] 33%|███▎ | 121542/371472 [9:40:14<20:07:59, 3.45it/s] 33%|███▎ | 121543/371472 [9:40:14<19:59:12, 3.47it/s] 33%|███▎ | 121544/371472 [9:40:14<20:06:51, 3.45it/s] 33%|███▎ | 121545/371472 [9:40:15<20:54:58, 3.32it/s] 33%|███▎ | 121546/371472 [9:40:15<20:33:52, 3.38it/s] 33%|███▎ | 121547/371472 [9:40:15<20:21:20, 3.41it/s] 33%|███▎ | 121548/371472 [9:40:16<19:27:53, 3.57it/s] 33%|███▎ | 121549/371472 [9:40:16<19:16:14, 3.60it/s] 33%|███▎ | 121550/371472 [9:40:16<19:11:36, 3.62it/s] 33%|███▎ | 121551/371472 [9:40:16<19:28:39, 3.56it/s] 33%|███▎ | 121552/371472 [9:40:17<19:57:07, 3.48it/s] 33%|███▎ | 121553/371472 [9:40:17<18:53:06, 3.68it/s] 33%|███▎ | 121554/371472 [9:40:17<18:48:24, 3.69it/s] 33%|███▎ | 121555/371472 [9:40:18<19:46:12, 3.51it/s] 33%|███▎ | 121556/371472 [9:40:18<19:28:26, 3.56it/s] 33%|███▎ | 121557/371472 [9:40:18<18:36:41, 3.73it/s] 33%|███▎ | 121558/371472 [9:40:18<19:14:00, 3.61it/s] 33%|███▎ | 121559/371472 [9:40:19<18:48:59, 3.69it/s] 33%|███▎ | 121560/371472 [9:40:19<18:29:31, 3.75it/s] {'loss': 3.2571, 'learning_rate': 7.058113727940701e-07, 'epoch': 5.24} + 33%|███▎ | 121560/371472 [9:40:19<18:29:31, 3.75it/s] 33%|███▎ | 121561/371472 [9:40:19<19:12:48, 3.61it/s] 33%|███▎ | 121562/371472 [9:40:19<20:18:59, 3.42it/s] 33%|███▎ | 121563/371472 [9:40:20<19:16:13, 3.60it/s] 33%|███▎ | 121564/371472 [9:40:20<19:00:14, 3.65it/s] 33%|███▎ | 121565/371472 [9:40:20<20:02:17, 3.46it/s] 33%|███▎ | 121566/371472 [9:40:21<19:52:47, 3.49it/s] 33%|███▎ | 121567/371472 [9:40:21<19:31:55, 3.55it/s] 33%|███▎ | 121568/371472 [9:40:21<19:37:02, 3.54it/s] 33%|███▎ | 121569/371472 [9:40:21<19:18:27, 3.60it/s] 33%|███▎ | 121570/371472 [9:40:22<20:18:36, 3.42it/s] 33%|███▎ | 121571/371472 [9:40:22<19:32:58, 3.55it/s] 33%|███▎ | 121572/371472 [9:40:22<19:24:39, 3.58it/s] 33%|███▎ | 121573/371472 [9:40:23<19:13:15, 3.61it/s] 33%|███▎ | 121574/371472 [9:40:23<19:00:39, 3.65it/s] 33%|███▎ | 121575/371472 [9:40:23<19:23:10, 3.58it/s] 33%|███▎ | 121576/371472 [9:40:23<19:07:20, 3.63it/s] 33%|███▎ | 121577/371472 [9:40:24<19:06:00, 3.63it/s] 33%|███▎ | 121578/371472 [9:40:24<19:16:24, 3.60it/s] 33%|███▎ | 121579/371472 [9:40:24<19:08:49, 3.63it/s] 33%|███▎ | 121580/371472 [9:40:24<18:30:15, 3.75it/s] {'loss': 3.2318, 'learning_rate': 7.057628908185912e-07, 'epoch': 5.24} + 33%|███▎ | 121580/371472 [9:40:24<18:30:15, 3.75it/s] 33%|███▎ | 121581/371472 [9:40:25<18:32:54, 3.74it/s] 33%|███▎ | 121582/371472 [9:40:25<19:59:38, 3.47it/s] 33%|███▎ | 121583/371472 [9:40:25<18:56:48, 3.66it/s] 33%|███▎ | 121584/371472 [9:40:26<20:15:00, 3.43it/s] 33%|███▎ | 121585/371472 [9:40:26<21:01:59, 3.30it/s] 33%|███▎ | 121586/371472 [9:40:26<20:54:27, 3.32it/s] 33%|███▎ | 121587/371472 [9:40:27<20:24:05, 3.40it/s] 33%|███▎ | 121588/371472 [9:40:27<20:39:26, 3.36it/s] 33%|███▎ | 121589/371472 [9:40:27<20:06:38, 3.45it/s] 33%|███▎ | 121590/371472 [9:40:27<19:08:58, 3.62it/s] 33%|███▎ | 121591/371472 [9:40:28<19:09:50, 3.62it/s] 33%|███▎ | 121592/371472 [9:40:28<18:31:54, 3.75it/s] 33%|███▎ | 121593/371472 [9:40:28<18:39:53, 3.72it/s] 33%|███▎ | 121594/371472 [9:40:28<18:26:36, 3.76it/s] 33%|███▎ | 121595/371472 [9:40:29<18:24:23, 3.77it/s] 33%|███▎ | 121596/371472 [9:40:29<18:33:32, 3.74it/s] 33%|███▎ | 121597/371472 [9:40:29<18:36:47, 3.73it/s] 33%|███▎ | 121598/371472 [9:40:30<19:16:22, 3.60it/s] 33%|███▎ | 121599/371472 [9:40:30<19:04:16, 3.64it/s] 33%|███▎ | 121600/371472 [9:40:30<18:35:35, 3.73it/s] {'loss': 3.1048, 'learning_rate': 7.057144088431123e-07, 'epoch': 5.24} + 33%|███▎ | 121600/371472 [9:40:30<18:35:35, 3.73it/s] 33%|███▎ | 121601/371472 [9:40:30<19:37:20, 3.54it/s] 33%|███▎ | 121602/371472 [9:40:31<19:00:46, 3.65it/s] 33%|███▎ | 121603/371472 [9:40:31<19:07:38, 3.63it/s] 33%|███▎ | 121604/371472 [9:40:31<21:19:52, 3.25it/s] 33%|███▎ | 121605/371472 [9:40:32<21:52:56, 3.17it/s] 33%|███▎ | 121606/371472 [9:40:32<21:29:53, 3.23it/s] 33%|███▎ | 121607/371472 [9:40:32<19:59:12, 3.47it/s] 33%|███▎ | 121608/371472 [9:40:32<19:33:37, 3.55it/s] 33%|███▎ | 121609/371472 [9:40:33<19:35:42, 3.54it/s] 33%|███▎ | 121610/371472 [9:40:33<18:52:00, 3.68it/s] 33%|███▎ | 121611/371472 [9:40:33<18:36:15, 3.73it/s] 33%|███▎ | 121612/371472 [9:40:33<18:58:37, 3.66it/s] 33%|███▎ | 121613/371472 [9:40:34<20:13:42, 3.43it/s] 33%|███▎ | 121614/371472 [9:40:34<19:40:13, 3.53it/s] 33%|███▎ | 121615/371472 [9:40:34<18:47:58, 3.69it/s] 33%|███▎ | 121616/371472 [9:40:35<18:49:13, 3.69it/s] 33%|███▎ | 121617/371472 [9:40:35<18:42:04, 3.71it/s] 33%|███▎ | 121618/371472 [9:40:35<18:52:05, 3.68it/s] 33%|███▎ | 121619/371472 [9:40:35<18:41:08, 3.71it/s] 33%|███▎ | 121620/371472 [9:40:36<20:22:09, 3.41it/s] {'loss': 3.2561, 'learning_rate': 7.056659268676335e-07, 'epoch': 5.24} + 33%|███▎ | 121620/371472 [9:40:36<20:22:09, 3.41it/s] 33%|███▎ | 121621/371472 [9:40:36<19:32:31, 3.55it/s] 33%|███▎ | 121622/371472 [9:40:36<19:20:07, 3.59it/s] 33%|███▎ | 121623/371472 [9:40:37<20:53:58, 3.32it/s] 33%|███▎ | 121624/371472 [9:40:37<20:42:51, 3.35it/s] 33%|███▎ | 121625/371472 [9:40:37<20:50:37, 3.33it/s] 33%|███▎ | 121626/371472 [9:40:38<21:06:26, 3.29it/s] 33%|███▎ | 121627/371472 [9:40:38<21:21:47, 3.25it/s] 33%|███▎ | 121628/371472 [9:40:38<20:29:05, 3.39it/s] 33%|███▎ | 121629/371472 [9:40:38<21:00:07, 3.30it/s] 33%|███▎ | 121630/371472 [9:40:39<20:10:54, 3.44it/s] 33%|███▎ | 121631/371472 [9:40:39<19:15:50, 3.60it/s] 33%|███▎ | 121632/371472 [9:40:39<18:46:48, 3.70it/s] 33%|███▎ | 121633/371472 [9:40:39<18:05:56, 3.83it/s] 33%|███▎ | 121634/371472 [9:40:40<20:16:27, 3.42it/s] 33%|███▎ | 121635/371472 [9:40:40<19:56:36, 3.48it/s] 33%|███▎ | 121636/371472 [9:40:40<19:40:46, 3.53it/s] 33%|███▎ | 121637/371472 [9:40:41<19:32:30, 3.55it/s] 33%|███▎ | 121638/371472 [9:40:41<18:30:01, 3.75it/s] 33%|███▎ | 121639/371472 [9:40:41<17:54:59, 3.87it/s] 33%|███▎ | 121640/371472 [9:40:41<17:42:45, 3.92it/s] {'loss': 3.3285, 'learning_rate': 7.056174448921545e-07, 'epoch': 5.24} + 33%|███▎ | 121640/371472 [9:40:41<17:42:45, 3.92it/s] 33%|███▎ | 121641/371472 [9:40:42<19:46:42, 3.51it/s] 33%|███▎ | 121642/371472 [9:40:42<20:19:02, 3.42it/s] 33%|███▎ | 121643/371472 [9:40:42<19:41:47, 3.52it/s] 33%|███▎ | 121644/371472 [9:40:43<20:27:07, 3.39it/s] 33%|███▎ | 121645/371472 [9:40:43<20:21:09, 3.41it/s] 33%|███▎ | 121646/371472 [9:40:43<20:28:08, 3.39it/s] 33%|███▎ | 121647/371472 [9:40:44<20:46:38, 3.34it/s] 33%|███▎ | 121648/371472 [9:40:44<20:24:07, 3.40it/s] 33%|███▎ | 121649/371472 [9:40:44<19:39:30, 3.53it/s] 33%|███▎ | 121650/371472 [9:40:44<18:49:49, 3.69it/s] 33%|███▎ | 121651/371472 [9:40:45<19:39:36, 3.53it/s] 33%|███▎ | 121652/371472 [9:40:45<19:57:43, 3.48it/s] 33%|███▎ | 121653/371472 [9:40:45<20:32:40, 3.38it/s] 33%|███▎ | 121654/371472 [9:40:45<20:01:12, 3.47it/s] 33%|███▎ | 121655/371472 [9:40:46<19:31:38, 3.55it/s] 33%|███▎ | 121656/371472 [9:40:46<19:35:49, 3.54it/s] 33%|███▎ | 121657/371472 [9:40:46<19:42:50, 3.52it/s] 33%|███▎ | 121658/371472 [9:40:47<18:53:21, 3.67it/s] 33%|███▎ | 121659/371472 [9:40:47<20:24:18, 3.40it/s] 33%|███▎ | 121660/371472 [9:40:47<19:23:04, 3.58it/s] {'loss': 3.1627, 'learning_rate': 7.055689629166756e-07, 'epoch': 5.24} + 33%|███▎ | 121660/371472 [9:40:47<19:23:04, 3.58it/s] 33%|███▎ | 121661/371472 [9:40:47<19:10:55, 3.62it/s] 33%|███▎ | 121662/371472 [9:40:48<18:52:45, 3.68it/s] 33%|███▎ | 121663/371472 [9:40:48<18:48:40, 3.69it/s] 33%|███▎ | 121664/371472 [9:40:48<18:23:55, 3.77it/s] 33%|███▎ | 121665/371472 [9:40:48<18:10:23, 3.82it/s] 33%|███▎ | 121666/371472 [9:40:49<19:31:31, 3.55it/s] 33%|███▎ | 121667/371472 [9:40:49<19:29:14, 3.56it/s] 33%|███▎ | 121668/371472 [9:40:49<19:06:05, 3.63it/s] 33%|███▎ | 121669/371472 [9:40:50<18:57:19, 3.66it/s] 33%|███▎ | 121670/371472 [9:40:50<20:13:05, 3.43it/s] 33%|███▎ | 121671/371472 [9:40:50<20:11:46, 3.44it/s] 33%|███▎ | 121672/371472 [9:40:50<19:17:54, 3.60it/s] 33%|███▎ | 121673/371472 [9:40:51<18:25:58, 3.76it/s] 33%|███▎ | 121674/371472 [9:40:51<19:19:23, 3.59it/s] 33%|███▎ | 121675/371472 [9:40:51<18:34:48, 3.73it/s] 33%|███▎ | 121676/371472 [9:40:52<18:31:31, 3.75it/s] 33%|███▎ | 121677/371472 [9:40:52<18:49:01, 3.69it/s] 33%|███▎ | 121678/371472 [9:40:52<18:26:53, 3.76it/s] 33%|███▎ | 121679/371472 [9:40:52<18:13:01, 3.81it/s] 33%|███▎ | 121680/371472 [9:40:53<19:34:33, 3.54it/s] {'loss': 3.2619, 'learning_rate': 7.055204809411967e-07, 'epoch': 5.24} + 33%|███▎ | 121680/371472 [9:40:53<19:34:33, 3.54it/s] 33%|███▎ | 121681/371472 [9:40:53<19:54:59, 3.48it/s] 33%|███▎ | 121682/371472 [9:40:53<20:19:42, 3.41it/s] 33%|███▎ | 121683/371472 [9:40:53<19:09:52, 3.62it/s] 33%|███▎ | 121684/371472 [9:40:54<18:56:52, 3.66it/s] 33%|███▎ | 121685/371472 [9:40:54<18:37:35, 3.73it/s] 33%|███▎ | 121686/371472 [9:40:54<18:53:10, 3.67it/s] 33%|███▎ | 121687/371472 [9:40:55<18:05:22, 3.84it/s] 33%|███▎ | 121688/371472 [9:40:55<18:52:16, 3.68it/s] 33%|███▎ | 121689/371472 [9:40:55<18:33:14, 3.74it/s] 33%|███▎ | 121690/371472 [9:40:55<19:57:58, 3.48it/s] 33%|███▎ | 121691/371472 [9:40:56<21:44:16, 3.19it/s] 33%|███▎ | 121692/371472 [9:40:56<20:35:32, 3.37it/s] 33%|███▎ | 121693/371472 [9:40:56<19:31:24, 3.55it/s] 33%|███▎ | 121694/371472 [9:40:57<19:53:29, 3.49it/s] 33%|███▎ | 121695/371472 [9:40:57<19:09:54, 3.62it/s] 33%|███▎ | 121696/371472 [9:40:57<18:52:04, 3.68it/s] 33%|███▎ | 121697/371472 [9:40:57<19:53:06, 3.49it/s] 33%|███▎ | 121698/371472 [9:40:58<21:03:16, 3.30it/s] 33%|███▎ | 121699/371472 [9:40:58<20:24:48, 3.40it/s] 33%|███▎ | 121700/371472 [9:40:58<20:10:12, 3.44it/s] {'loss': 3.1182, 'learning_rate': 7.054719989657179e-07, 'epoch': 5.24} + 33%|███▎ | 121700/371472 [9:40:58<20:10:12, 3.44it/s] 33%|███▎ | 121701/371472 [9:40:59<20:32:11, 3.38it/s] 33%|███▎ | 121702/371472 [9:40:59<20:10:56, 3.44it/s] 33%|███▎ | 121703/371472 [9:40:59<23:08:56, 3.00it/s] 33%|███▎ | 121704/371472 [9:41:00<21:25:19, 3.24it/s] 33%|███▎ | 121705/371472 [9:41:00<21:22:28, 3.25it/s] 33%|███▎ | 121706/371472 [9:41:00<21:07:08, 3.29it/s] 33%|███▎ | 121707/371472 [9:41:00<20:55:54, 3.31it/s] 33%|███▎ | 121708/371472 [9:41:01<20:39:11, 3.36it/s] 33%|███▎ | 121709/371472 [9:41:01<19:52:52, 3.49it/s] 33%|███▎ | 121710/371472 [9:41:01<20:45:29, 3.34it/s] 33%|███▎ | 121711/371472 [9:41:02<21:05:05, 3.29it/s] 33%|███▎ | 121712/371472 [9:41:02<20:05:51, 3.45it/s] 33%|███▎ | 121713/371472 [9:41:02<19:45:01, 3.51it/s] 33%|███▎ | 121714/371472 [9:41:02<19:24:23, 3.57it/s] 33%|███▎ | 121715/371472 [9:41:03<18:49:44, 3.68it/s] 33%|███▎ | 121716/371472 [9:41:03<20:07:00, 3.45it/s] 33%|███▎ | 121717/371472 [9:41:03<20:09:59, 3.44it/s] 33%|███▎ | 121718/371472 [9:41:04<19:25:51, 3.57it/s] 33%|███▎ | 121719/371472 [9:41:04<18:48:20, 3.69it/s] 33%|███▎ | 121720/371472 [9:41:04<19:15:08, 3.60it/s] {'loss': 3.2971, 'learning_rate': 7.054235169902389e-07, 'epoch': 5.24} + 33%|███▎ | 121720/371472 [9:41:04<19:15:08, 3.60it/s] 33%|███▎ | 121721/371472 [9:41:04<18:56:06, 3.66it/s] 33%|███▎ | 121722/371472 [9:41:05<18:52:42, 3.67it/s] 33%|███▎ | 121723/371472 [9:41:05<19:31:15, 3.55it/s] 33%|███▎ | 121724/371472 [9:41:05<18:26:14, 3.76it/s] 33%|███▎ | 121725/371472 [9:41:06<18:49:06, 3.69it/s] 33%|███▎ | 121726/371472 [9:41:06<18:06:13, 3.83it/s] 33%|███▎ | 121727/371472 [9:41:06<19:23:31, 3.58it/s] 33%|███▎ | 121728/371472 [9:41:06<18:28:35, 3.75it/s] 33%|███▎ | 121729/371472 [9:41:07<18:48:00, 3.69it/s] 33%|███▎ | 121730/371472 [9:41:07<18:11:58, 3.81it/s] 33%|███▎ | 121731/371472 [9:41:07<18:08:06, 3.83it/s] 33%|███▎ | 121732/371472 [9:41:07<18:40:15, 3.72it/s] 33%|███▎ | 121733/371472 [9:41:08<19:23:06, 3.58it/s] 33%|███▎ | 121734/371472 [9:41:08<19:51:25, 3.49it/s] 33%|███▎ | 121735/371472 [9:41:08<20:19:29, 3.41it/s] 33%|███▎ | 121736/371472 [9:41:09<20:19:35, 3.41it/s] 33%|███▎ | 121737/371472 [9:41:09<20:41:35, 3.35it/s] 33%|███▎ | 121738/371472 [9:41:09<21:02:50, 3.30it/s] 33%|███▎ | 121739/371472 [9:41:09<20:15:19, 3.42it/s] 33%|███▎ | 121740/371472 [9:41:10<19:17:49, 3.59it/s] {'loss': 3.2285, 'learning_rate': 7.0537503501476e-07, 'epoch': 5.24} + 33%|███▎ | 121740/371472 [9:41:10<19:17:49, 3.59it/s] 33%|███▎ | 121741/371472 [9:41:10<19:17:50, 3.59it/s] 33%|███▎ | 121742/371472 [9:41:10<19:36:51, 3.54it/s] 33%|███▎ | 121743/371472 [9:41:11<19:03:19, 3.64it/s] 33%|███▎ | 121744/371472 [9:41:11<18:16:12, 3.80it/s] 33%|███▎ | 121745/371472 [9:41:11<18:02:40, 3.84it/s] 33%|███▎ | 121746/371472 [9:41:11<18:19:15, 3.79it/s] 33%|███▎ | 121747/371472 [9:41:12<19:17:57, 3.59it/s] 33%|███▎ | 121748/371472 [9:41:12<19:14:24, 3.61it/s] 33%|███▎ | 121749/371472 [9:41:12<19:25:32, 3.57it/s] 33%|███▎ | 121750/371472 [9:41:12<18:58:46, 3.65it/s] 33%|███▎ | 121751/371472 [9:41:13<19:03:07, 3.64it/s] 33%|███▎ | 121752/371472 [9:41:13<18:52:43, 3.67it/s] 33%|███▎ | 121753/371472 [9:41:13<19:25:00, 3.57it/s] 33%|███▎ | 121754/371472 [9:41:14<18:40:10, 3.72it/s] 33%|███▎ | 121755/371472 [9:41:14<18:03:31, 3.84it/s] 33%|███▎ | 121756/371472 [9:41:14<18:22:38, 3.77it/s] 33%|███▎ | 121757/371472 [9:41:14<19:12:46, 3.61it/s] 33%|███▎ | 121758/371472 [9:41:15<18:28:14, 3.76it/s] 33%|███▎ | 121759/371472 [9:41:15<18:26:26, 3.76it/s] 33%|███▎ | 121760/371472 [9:41:15<19:50:17, 3.50it/s] {'loss': 3.297, 'learning_rate': 7.053265530392812e-07, 'epoch': 5.24} + 33%|███▎ | 121760/371472 [9:41:15<19:50:17, 3.50it/s] 33%|███▎ | 121761/371472 [9:41:15<20:09:59, 3.44it/s] 33%|███▎ | 121762/371472 [9:41:16<20:11:45, 3.43it/s] 33%|███▎ | 121763/371472 [9:41:16<19:15:42, 3.60it/s] 33%|███▎ | 121764/371472 [9:41:16<19:29:32, 3.56it/s] 33%|███▎ | 121765/371472 [9:41:17<19:04:35, 3.64it/s] 33%|███▎ | 121766/371472 [9:41:17<18:34:45, 3.73it/s] 33%|███▎ | 121767/371472 [9:41:17<18:22:11, 3.78it/s] 33%|███▎ | 121768/371472 [9:41:17<18:14:51, 3.80it/s] 33%|███▎ | 121769/371472 [9:41:18<17:52:13, 3.88it/s] 33%|███▎ | 121770/371472 [9:41:18<18:10:35, 3.82it/s] 33%|███▎ | 121771/371472 [9:41:18<19:10:53, 3.62it/s] 33%|███▎ | 121772/371472 [9:41:19<23:06:18, 3.00it/s] 33%|███▎ | 121773/371472 [9:41:19<22:15:48, 3.12it/s] 33%|███▎ | 121774/371472 [9:41:19<21:23:41, 3.24it/s] 33%|███▎ | 121775/371472 [9:41:19<20:26:17, 3.39it/s] 33%|███▎ | 121776/371472 [9:41:20<20:08:52, 3.44it/s] 33%|███▎ | 121777/371472 [9:41:20<19:50:40, 3.50it/s] 33%|███▎ | 121778/371472 [9:41:20<19:30:54, 3.55it/s] 33%|███▎ | 121779/371472 [9:41:21<19:28:46, 3.56it/s] 33%|███▎ | 121780/371472 [9:41:21<18:23:04, 3.77it/s] {'loss': 3.1897, 'learning_rate': 7.052780710638022e-07, 'epoch': 5.25} + 33%|███▎ | 121780/371472 [9:41:21<18:23:04, 3.77it/s] 33%|███▎ | 121781/371472 [9:41:21<18:26:16, 3.76it/s] 33%|███▎ | 121782/371472 [9:41:21<18:29:31, 3.75it/s] 33%|███▎ | 121783/371472 [9:41:22<19:04:37, 3.64it/s] 33%|███▎ | 121784/371472 [9:41:22<19:45:05, 3.51it/s] 33%|███▎ | 121785/371472 [9:41:22<19:10:34, 3.62it/s] 33%|███▎ | 121786/371472 [9:41:22<19:04:47, 3.64it/s] 33%|███▎ | 121787/371472 [9:41:23<20:06:45, 3.45it/s] 33%|███▎ | 121788/371472 [9:41:23<19:48:35, 3.50it/s] 33%|███▎ | 121789/371472 [9:41:23<19:37:01, 3.54it/s] 33%|███▎ | 121790/371472 [9:41:24<20:21:17, 3.41it/s] 33%|███▎ | 121791/371472 [9:41:24<20:39:04, 3.36it/s] 33%|███▎ | 121792/371472 [9:41:24<19:37:54, 3.53it/s] 33%|███▎ | 121793/371472 [9:41:24<18:38:04, 3.72it/s] 33%|███▎ | 121794/371472 [9:41:25<18:42:50, 3.71it/s] 33%|███▎ | 121795/371472 [9:41:25<19:31:46, 3.55it/s] 33%|███▎ | 121796/371472 [9:41:25<19:43:20, 3.52it/s] 33%|███▎ | 121797/371472 [9:41:26<18:51:04, 3.68it/s] 33%|███▎ | 121798/371472 [9:41:26<18:20:43, 3.78it/s] 33%|███▎ | 121799/371472 [9:41:26<20:28:45, 3.39it/s] 33%|███▎ | 121800/371472 [9:41:26<19:52:37, 3.49it/s] {'loss': 3.4053, 'learning_rate': 7.052295890883233e-07, 'epoch': 5.25} + 33%|███▎ | 121800/371472 [9:41:26<19:52:37, 3.49it/s] 33%|███▎ | 121801/371472 [9:41:27<19:32:59, 3.55it/s] 33%|███▎ | 121802/371472 [9:41:27<19:32:05, 3.55it/s] 33%|███▎ | 121803/371472 [9:41:27<18:58:59, 3.65it/s] 33%|███▎ | 121804/371472 [9:41:28<18:54:59, 3.67it/s] 33%|███▎ | 121805/371472 [9:41:28<19:19:36, 3.59it/s] 33%|███▎ | 121806/371472 [9:41:28<18:53:14, 3.67it/s] 33%|███▎ | 121807/371472 [9:41:28<20:24:41, 3.40it/s] 33%|███▎ | 121808/371472 [9:41:29<20:07:28, 3.45it/s] 33%|███▎ | 121809/371472 [9:41:29<19:01:31, 3.65it/s] 33%|███▎ | 121810/371472 [9:41:29<19:55:12, 3.48it/s] 33%|███▎ | 121811/371472 [9:41:30<21:08:27, 3.28it/s] 33%|███▎ | 121812/371472 [9:41:30<20:32:41, 3.38it/s] 33%|███▎ | 121813/371472 [9:41:30<20:01:26, 3.46it/s] 33%|███▎ | 121814/371472 [9:41:30<20:34:37, 3.37it/s] 33%|███▎ | 121815/371472 [9:41:31<19:25:11, 3.57it/s] 33%|███▎ | 121816/371472 [9:41:31<19:04:53, 3.63it/s] 33%|███▎ | 121817/371472 [9:41:31<21:28:00, 3.23it/s] 33%|███▎ | 121818/371472 [9:41:32<20:58:51, 3.31it/s] 33%|███▎ | 121819/371472 [9:41:32<20:07:41, 3.45it/s] 33%|███▎ | 121820/371472 [9:41:32<19:12:05, 3.61it/s] {'loss': 3.2536, 'learning_rate': 7.051811071128444e-07, 'epoch': 5.25} + 33%|███▎ | 121820/371472 [9:41:32<19:12:05, 3.61it/s] 33%|███▎ | 121821/371472 [9:41:32<20:18:52, 3.41it/s] 33%|███▎ | 121822/371472 [9:41:33<20:26:43, 3.39it/s] 33%|███▎ | 121823/371472 [9:41:33<20:04:11, 3.46it/s] 33%|███▎ | 121824/371472 [9:41:33<20:38:51, 3.36it/s] 33%|███▎ | 121825/371472 [9:41:34<20:05:59, 3.45it/s] 33%|███▎ | 121826/371472 [9:41:34<19:26:58, 3.57it/s] 33%|███▎ | 121827/371472 [9:41:34<19:17:12, 3.60it/s] 33%|███▎ | 121828/371472 [9:41:34<18:47:18, 3.69it/s] 33%|███▎ | 121829/371472 [9:41:35<18:21:56, 3.78it/s] 33%|███▎ | 121830/371472 [9:41:35<18:25:43, 3.76it/s] 33%|███▎ | 121831/371472 [9:41:35<18:19:56, 3.78it/s] 33%|███▎ | 121832/371472 [9:41:35<18:26:29, 3.76it/s] 33%|███▎ | 121833/371472 [9:41:36<18:22:58, 3.77it/s] 33%|██��▎ | 121834/371472 [9:41:36<18:17:50, 3.79it/s] 33%|███▎ | 121835/371472 [9:41:36<19:31:23, 3.55it/s] 33%|███▎ | 121836/371472 [9:41:37<19:11:32, 3.61it/s] 33%|███▎ | 121837/371472 [9:41:37<18:39:00, 3.72it/s] 33%|███▎ | 121838/371472 [9:41:37<18:37:07, 3.72it/s] 33%|███▎ | 121839/371472 [9:41:37<18:53:01, 3.67it/s] 33%|███▎ | 121840/371472 [9:41:38<18:45:00, 3.70it/s] {'loss': 3.2782, 'learning_rate': 7.051326251373656e-07, 'epoch': 5.25} + 33%|███▎ | 121840/371472 [9:41:38<18:45:00, 3.70it/s] 33%|███▎ | 121841/371472 [9:41:38<18:45:17, 3.70it/s] 33%|███▎ | 121842/371472 [9:41:38<18:25:35, 3.76it/s] 33%|███▎ | 121843/371472 [9:41:38<18:02:17, 3.84it/s] 33%|███▎ | 121844/371472 [9:41:39<19:20:36, 3.58it/s] 33%|███▎ | 121845/371472 [9:41:39<18:58:01, 3.66it/s] 33%|███▎ | 121846/371472 [9:41:39<19:06:56, 3.63it/s] 33%|███▎ | 121847/371472 [9:41:40<19:32:32, 3.55it/s] 33%|███▎ | 121848/371472 [9:41:40<19:33:03, 3.55it/s] 33%|███▎ | 121849/371472 [9:41:40<20:42:29, 3.35it/s] 33%|███▎ | 121850/371472 [9:41:40<19:59:09, 3.47it/s] 33%|███▎ | 121851/371472 [9:41:41<20:36:01, 3.37it/s] 33%|███▎ | 121852/371472 [9:41:41<19:47:06, 3.50it/s] 33%|███▎ | 121853/371472 [9:41:41<21:01:14, 3.30it/s] 33%|███▎ | 121854/371472 [9:41:42<21:16:32, 3.26it/s] 33%|███▎ | 121855/371472 [9:41:42<20:59:40, 3.30it/s] 33%|███▎ | 121856/371472 [9:41:42<20:59:23, 3.30it/s] 33%|███▎ | 121857/371472 [9:41:43<20:29:40, 3.38it/s] 33%|███▎ | 121858/371472 [9:41:43<20:07:39, 3.44it/s] 33%|███▎ | 121859/371472 [9:41:43<19:11:49, 3.61it/s] 33%|███▎ | 121860/371472 [9:41:43<18:35:16, 3.73it/s] {'loss': 3.3838, 'learning_rate': 7.050841431618867e-07, 'epoch': 5.25} + 33%|███▎ | 121860/371472 [9:41:43<18:35:16, 3.73it/s] 33%|███▎ | 121861/371472 [9:41:44<18:28:43, 3.75it/s] 33%|███▎ | 121862/371472 [9:41:44<18:05:24, 3.83it/s] 33%|███▎ | 121863/371472 [9:41:44<20:01:31, 3.46it/s] 33%|███▎ | 121864/371472 [9:41:45<21:21:27, 3.25it/s] 33%|███▎ | 121865/371472 [9:41:45<21:12:52, 3.27it/s] 33%|███▎ | 121866/371472 [9:41:45<20:38:27, 3.36it/s] 33%|███▎ | 121867/371472 [9:41:45<19:36:04, 3.54it/s] 33%|███▎ | 121868/371472 [9:41:46<20:18:22, 3.41it/s] 33%|███▎ | 121869/371472 [9:41:46<21:09:24, 3.28it/s] 33%|███▎ | 121870/371472 [9:41:46<20:10:10, 3.44it/s] 33%|███▎ | 121871/371472 [9:41:47<19:58:13, 3.47it/s] 33%|███▎ | 121872/371472 [9:41:47<20:10:01, 3.44it/s] 33%|███▎ | 121873/371472 [9:41:47<19:04:26, 3.63it/s] 33%|███▎ | 121874/371472 [9:41:47<18:43:41, 3.70it/s] 33%|███▎ | 121875/371472 [9:41:48<18:46:16, 3.69it/s] 33%|███▎ | 121876/371472 [9:41:48<20:04:00, 3.46it/s] 33%|███▎ | 121877/371472 [9:41:48<20:12:47, 3.43it/s] 33%|███▎ | 121878/371472 [9:41:49<19:34:17, 3.54it/s] 33%|███▎ | 121879/371472 [9:41:49<19:33:56, 3.54it/s] 33%|███▎ | 121880/371472 [9:41:49<19:51:44, 3.49it/s] {'loss': 3.2346, 'learning_rate': 7.050356611864078e-07, 'epoch': 5.25} + 33%|███▎ | 121880/371472 [9:41:49<19:51:44, 3.49it/s] 33%|███▎ | 121881/371472 [9:41:49<19:29:41, 3.56it/s] 33%|███▎ | 121882/371472 [9:41:50<18:50:21, 3.68it/s] 33%|███▎ | 121883/371472 [9:41:50<19:07:05, 3.63it/s] 33%|███▎ | 121884/371472 [9:41:50<18:33:18, 3.74it/s] 33%|███▎ | 121885/371472 [9:41:50<18:44:32, 3.70it/s] 33%|███▎ | 121886/371472 [9:41:51<19:27:44, 3.56it/s] 33%|███▎ | 121887/371472 [9:41:51<19:45:25, 3.51it/s] 33%|███▎ | 121888/371472 [9:41:51<18:51:14, 3.68it/s] 33%|███▎ | 121889/371472 [9:41:52<18:32:43, 3.74it/s] 33%|███▎ | 121890/371472 [9:41:52<23:37:54, 2.93it/s] 33%|███▎ | 121891/371472 [9:41:52<22:35:49, 3.07it/s] 33%|███▎ | 121892/371472 [9:41:53<21:00:04, 3.30it/s] 33%|███▎ | 121893/371472 [9:41:53<19:39:50, 3.53it/s] 33%|███▎ | 121894/371472 [9:41:53<19:48:29, 3.50it/s] 33%|███▎ | 121895/371472 [9:41:53<19:07:46, 3.62it/s] 33%|███▎ | 121896/371472 [9:41:54<18:46:06, 3.69it/s] 33%|███▎ | 121897/371472 [9:41:54<18:20:49, 3.78it/s] 33%|███▎ | 121898/371472 [9:41:54<18:51:55, 3.67it/s] 33%|███▎ | 121899/371472 [9:41:54<18:24:14, 3.77it/s] 33%|███▎ | 121900/371472 [9:41:55<18:13:07, 3.81it/s] {'loss': 3.37, 'learning_rate': 7.049871792109288e-07, 'epoch': 5.25} + 33%|███▎ | 121900/371472 [9:41:55<18:13:07, 3.81it/s] 33%|███▎ | 121901/371472 [9:41:55<18:24:14, 3.77it/s] 33%|███▎ | 121902/371472 [9:41:55<18:46:29, 3.69it/s] 33%|███▎ | 121903/371472 [9:41:56<18:39:30, 3.72it/s] 33%|███▎ | 121904/371472 [9:41:56<19:57:23, 3.47it/s] 33%|███▎ | 121905/371472 [9:41:56<19:38:21, 3.53it/s] 33%|███▎ | 121906/371472 [9:41:56<20:49:26, 3.33it/s] 33%|███▎ | 121907/371472 [9:41:57<20:02:39, 3.46it/s] 33%|███▎ | 121908/371472 [9:41:57<19:30:28, 3.55it/s] 33%|███▎ | 121909/371472 [9:41:57<18:58:55, 3.65it/s] 33%|███▎ | 121910/371472 [9:41:58<18:48:33, 3.69it/s] 33%|███▎ | 121911/371472 [9:41:58<18:06:18, 3.83it/s] 33%|███▎ | 121912/371472 [9:41:58<19:08:19, 3.62it/s] 33%|███▎ | 121913/371472 [9:41:58<19:28:37, 3.56it/s] 33%|███▎ | 121914/371472 [9:41:59<19:33:16, 3.55it/s] 33%|███▎ | 121915/371472 [9:41:59<19:12:23, 3.61it/s] 33%|███▎ | 121916/371472 [9:41:59<18:38:49, 3.72it/s] 33%|███▎ | 121917/371472 [9:41:59<19:28:36, 3.56it/s] 33%|███▎ | 121918/371472 [9:42:00<19:39:07, 3.53it/s] 33%|███▎ | 121919/371472 [9:42:00<19:29:34, 3.56it/s] 33%|███▎ | 121920/371472 [9:42:00<18:38:09, 3.72it/s] {'loss': 3.2603, 'learning_rate': 7.0493869723545e-07, 'epoch': 5.25} + 33%|███▎ | 121920/371472 [9:42:00<18:38:09, 3.72it/s] 33%|███▎ | 121921/371472 [9:42:01<19:59:38, 3.47it/s] 33%|███▎ | 121922/371472 [9:42:01<19:16:07, 3.60it/s] 33%|███▎ | 121923/371472 [9:42:01<18:57:12, 3.66it/s] 33%|███▎ | 121924/371472 [9:42:01<18:44:49, 3.70it/s] 33%|███▎ | 121925/371472 [9:42:02<18:28:48, 3.75it/s] 33%|███▎ | 121926/371472 [9:42:02<18:17:22, 3.79it/s] 33%|███▎ | 121927/371472 [9:42:02<18:35:49, 3.73it/s] 33%|███▎ | 121928/371472 [9:42:02<18:58:55, 3.65it/s] 33%|███▎ | 121929/371472 [9:42:03<19:24:18, 3.57it/s] 33%|███▎ | 121930/371472 [9:42:03<18:35:14, 3.73it/s] 33%|███▎ | 121931/371472 [9:42:03<22:14:27, 3.12it/s] 33%|███▎ | 121932/371472 [9:42:04<23:24:33, 2.96it/s] 33%|███▎ | 121933/371472 [9:42:04<22:21:14, 3.10it/s] 33%|███▎ | 121934/371472 [9:42:04<22:25:53, 3.09it/s] 33%|███▎ | 121935/371472 [9:42:05<21:46:27, 3.18it/s] 33%|███▎ | 121936/371472 [9:42:05<21:10:43, 3.27it/s] 33%|███▎ | 121937/371472 [9:42:05<20:52:14, 3.32it/s] 33%|███▎ | 121938/371472 [9:42:06<20:15:05, 3.42it/s] 33%|███▎ | 121939/371472 [9:42:06<19:04:45, 3.63it/s] 33%|███▎ | 121940/371472 [9:42:06<18:28:04, 3.75it/s] {'loss': 3.1746, 'learning_rate': 7.048902152599711e-07, 'epoch': 5.25} + 33%|███▎ | 121940/371472 [9:42:06<18:28:04, 3.75it/s] 33%|███▎ | 121941/371472 [9:42:06<19:34:24, 3.54it/s] 33%|███▎ | 121942/371472 [9:42:07<21:03:17, 3.29it/s] 33%|███▎ | 121943/371472 [9:42:07<22:40:00, 3.06it/s] 33%|███▎ | 121944/371472 [9:42:07<21:33:23, 3.22it/s] 33%|███▎ | 121945/371472 [9:42:08<20:00:37, 3.46it/s] 33%|███▎ | 121946/371472 [9:42:08<20:52:14, 3.32it/s] 33%|███▎ | 121947/371472 [9:42:08<20:24:30, 3.40it/s] 33%|███▎ | 121948/371472 [9:42:08<19:57:19, 3.47it/s] 33%|███▎ | 121949/371472 [9:42:09<19:29:20, 3.56it/s] 33%|███▎ | 121950/371472 [9:42:09<19:02:36, 3.64it/s] 33%|███▎ | 121951/371472 [9:42:09<19:23:55, 3.57it/s] 33%|███▎ | 121952/371472 [9:42:10<19:31:53, 3.55it/s] 33%|███▎ | 121953/371472 [9:42:10<21:00:05, 3.30it/s] 33%|███▎ | 121954/371472 [9:42:10<21:24:12, 3.24it/s] 33%|███▎ | 121955/371472 [9:42:11<21:24:22, 3.24it/s] 33%|███▎ | 121956/371472 [9:42:11<19:42:57, 3.52it/s] 33%|███▎ | 121957/371472 [9:42:11<18:52:10, 3.67it/s] 33%|███▎ | 121958/371472 [9:42:11<19:19:56, 3.59it/s] 33%|███▎ | 121959/371472 [9:42:12<19:10:50, 3.61it/s] 33%|███▎ | 121960/371472 [9:42:12<19:18:02, 3.59it/s] {'loss': 3.1348, 'learning_rate': 7.048417332844922e-07, 'epoch': 5.25} + 33%|███▎ | 121960/371472 [9:42:12<19:18:02, 3.59it/s] 33%|███▎ | 121961/371472 [9:42:12<18:45:42, 3.69it/s] 33%|███▎ | 121962/371472 [9:42:12<19:02:16, 3.64it/s] 33%|███▎ | 121963/371472 [9:42:13<19:04:06, 3.63it/s] 33%|███▎ | 121964/371472 [9:42:13<18:19:08, 3.78it/s] 33%|███▎ | 121965/371472 [9:42:13<18:33:27, 3.73it/s] 33%|███▎ | 121966/371472 [9:42:14<20:59:08, 3.30it/s] 33%|███▎ | 121967/371472 [9:42:14<20:01:23, 3.46it/s] 33%|███▎ | 121968/371472 [9:42:14<19:44:43, 3.51it/s] 33%|███▎ | 121969/371472 [9:42:14<18:48:41, 3.68it/s] 33%|███▎ | 121970/371472 [9:42:15<19:01:45, 3.64it/s] 33%|███▎ | 121971/371472 [9:42:15<18:39:25, 3.71it/s] 33%|███▎ | 121972/371472 [9:42:15<18:05:22, 3.83it/s] 33%|███▎ | 121973/371472 [9:42:15<17:56:38, 3.86it/s] 33%|███▎ | 121974/371472 [9:42:16<17:55:07, 3.87it/s] 33%|███▎ | 121975/371472 [9:42:16<17:41:51, 3.92it/s] 33%|███▎ | 121976/371472 [9:42:16<17:35:02, 3.94it/s] 33%|███▎ | 121977/371472 [9:42:16<18:11:47, 3.81it/s] 33%|███▎ | 121978/371472 [9:42:17<17:49:46, 3.89it/s] 33%|███▎ | 121979/371472 [9:42:17<18:28:40, 3.75it/s] 33%|███▎ | 121980/371472 [9:42:17<17:52:36, 3.88it/s] {'loss': 3.409, 'learning_rate': 7.047932513090133e-07, 'epoch': 5.25} + 33%|███▎ | 121980/371472 [9:42:17<17:52:36, 3.88it/s] 33%|███▎ | 121981/371472 [9:42:18<19:35:03, 3.54it/s] 33%|███▎ | 121982/371472 [9:42:18<19:06:41, 3.63it/s] 33%|███▎ | 121983/371472 [9:42:18<19:46:31, 3.50it/s] 33%|███▎ | 121984/371472 [9:42:18<19:30:43, 3.55it/s] 33%|███▎ | 121985/371472 [9:42:19<20:02:06, 3.46it/s] 33%|███▎ | 121986/371472 [9:42:19<20:39:40, 3.35it/s] 33%|███▎ | 121987/371472 [9:42:19<20:21:41, 3.40it/s] 33%|███▎ | 121988/371472 [9:42:20<19:34:51, 3.54it/s] 33%|███▎ | 121989/371472 [9:42:20<21:22:10, 3.24it/s] 33%|███▎ | 121990/371472 [9:42:20<21:28:23, 3.23it/s] 33%|███▎ | 121991/371472 [9:42:21<20:14:53, 3.42it/s] 33%|███▎ | 121992/371472 [9:42:21<19:48:51, 3.50it/s] 33%|███▎ | 121993/371472 [9:42:21<19:42:51, 3.52it/s] 33%|███▎ | 121994/371472 [9:42:21<20:02:12, 3.46it/s] 33%|███▎ | 121995/371472 [9:42:22<19:03:48, 3.64it/s] 33%|███▎ | 121996/371472 [9:42:22<19:07:41, 3.62it/s] 33%|███▎ | 121997/371472 [9:42:22<18:18:03, 3.79it/s] 33%|███▎ | 121998/371472 [9:42:22<17:37:57, 3.93it/s] 33%|███▎ | 121999/371472 [9:42:23<18:28:40, 3.75it/s] 33%|███▎ | 122000/371472 [9:42:23<19:03:44, 3.64it/s] {'loss': 3.1956, 'learning_rate': 7.047447693335345e-07, 'epoch': 5.25} + 33%|███▎ | 122000/371472 [9:42:23<19:03:44, 3.64it/s] 33%|███▎ | 122001/371472 [9:42:23<18:30:38, 3.74it/s] 33%|███▎ | 122002/371472 [9:42:23<18:22:41, 3.77it/s] 33%|███▎ | 122003/371472 [9:42:24<18:39:52, 3.71it/s] 33%|███▎ | 122004/371472 [9:42:24<18:56:57, 3.66it/s] 33%|███▎ | 122005/371472 [9:42:24<19:52:01, 3.49it/s] 33%|███▎ | 122006/371472 [9:42:25<23:53:00, 2.90it/s] 33%|███▎ | 122007/371472 [9:42:25<21:37:27, 3.20it/s] 33%|███▎ | 122008/371472 [9:42:25<20:45:58, 3.34it/s] 33%|███▎ | 122009/371472 [9:42:26<21:07:37, 3.28it/s] 33%|███▎ | 122010/371472 [9:42:26<20:44:12, 3.34it/s] 33%|███▎ | 122011/371472 [9:42:26<19:28:59, 3.56it/s] 33%|███▎ | 122012/371472 [9:42:26<19:15:20, 3.60it/s] 33%|███▎ | 122013/371472 [9:42:27<19:15:31, 3.60it/s] 33%|███▎ | 122014/371472 [9:42:27<18:54:54, 3.66it/s] 33%|███▎ | 122015/371472 [9:42:27<18:40:43, 3.71it/s] 33%|███▎ | 122016/371472 [9:42:27<18:37:32, 3.72it/s] 33%|███▎ | 122017/371472 [9:42:28<19:13:50, 3.60it/s] 33%|███▎ | 122018/371472 [9:42:28<18:31:27, 3.74it/s] 33%|███▎ | 122019/371472 [9:42:28<18:51:48, 3.67it/s] 33%|███▎ | 122020/371472 [9:42:29<18:29:52, 3.75it/s] {'loss': 3.2303, 'learning_rate': 7.046962873580555e-07, 'epoch': 5.26} + 33%|███▎ | 122020/371472 [9:42:29<18:29:52, 3.75it/s] 33%|███▎ | 122021/371472 [9:42:29<18:42:26, 3.70it/s] 33%|███▎ | 122022/371472 [9:42:29<18:03:41, 3.84it/s] 33%|███▎ | 122023/371472 [9:42:29<17:47:04, 3.90it/s] 33%|███▎ | 122024/371472 [9:42:30<17:40:39, 3.92it/s] 33%|███▎ | 122025/371472 [9:42:30<18:37:52, 3.72it/s] 33%|███▎ | 122026/371472 [9:42:30<21:03:38, 3.29it/s] 33%|███▎ | 122027/371472 [9:42:31<21:08:19, 3.28it/s] 33%|███▎ | 122028/371472 [9:42:31<20:21:20, 3.40it/s] 33%|███▎ | 122029/371472 [9:42:31<19:19:50, 3.58it/s] 33%|███▎ | 122030/371472 [9:42:31<20:22:21, 3.40it/s] 33%|███▎ | 122031/371472 [9:42:32<19:37:01, 3.53it/s] 33%|███▎ | 122032/371472 [9:42:32<19:55:11, 3.48it/s] 33%|███▎ | 122033/371472 [9:42:32<19:21:56, 3.58it/s] 33%|███▎ | 122034/371472 [9:42:33<19:53:49, 3.48it/s] 33%|███▎ | 122035/371472 [9:42:33<20:45:46, 3.34it/s] 33%|███▎ | 122036/371472 [9:42:33<19:37:31, 3.53it/s] 33%|███▎ | 122037/371472 [9:42:33<19:11:13, 3.61it/s] 33%|███▎ | 122038/371472 [9:42:34<18:47:05, 3.69it/s] 33%|███▎ | 122039/371472 [9:42:34<19:50:30, 3.49it/s] 33%|███▎ | 122040/371472 [9:42:34<19:11:45, 3.61it/s] {'loss': 3.3265, 'learning_rate': 7.046478053825766e-07, 'epoch': 5.26} + 33%|███▎ | 122040/371472 [9:42:34<19:11:45, 3.61it/s] 33%|███▎ | 122041/371472 [9:42:34<19:06:01, 3.63it/s] 33%|███▎ | 122042/371472 [9:42:35<18:46:09, 3.69it/s] 33%|███▎ | 122043/371472 [9:42:35<18:48:27, 3.68it/s] 33%|███▎ | 122044/371472 [9:42:35<18:17:52, 3.79it/s] 33%|███▎ | 122045/371472 [9:42:36<18:50:57, 3.68it/s] 33%|███▎ | 122046/371472 [9:42:36<18:44:25, 3.70it/s] 33%|███▎ | 122047/371472 [9:42:36<19:05:07, 3.63it/s] 33%|███▎ | 122048/371472 [9:42:36<18:48:56, 3.68it/s] 33%|███▎ | 122049/371472 [9:42:37<19:06:18, 3.63it/s] 33%|███▎ | 122050/371472 [9:42:37<19:04:53, 3.63it/s] 33%|███▎ | 122051/371472 [9:42:37<18:45:05, 3.69it/s] 33%|███▎ | 122052/371472 [9:42:37<18:28:20, 3.75it/s] 33%|███▎ | 122053/371472 [9:42:38<18:01:48, 3.84it/s] 33%|███▎ | 122054/371472 [9:42:38<17:39:31, 3.92it/s] 33%|███▎ | 122055/371472 [9:42:38<20:07:22, 3.44it/s] 33%|███▎ | 122056/371472 [9:42:39<20:25:56, 3.39it/s] 33%|███▎ | 122057/371472 [9:42:39<19:45:28, 3.51it/s] 33%|███▎ | 122058/371472 [9:42:39<23:14:32, 2.98it/s] 33%|███▎ | 122059/371472 [9:42:40<21:45:17, 3.18it/s] 33%|███▎ | 122060/371472 [9:42:40<20:51:57, 3.32it/s] {'loss': 3.3418, 'learning_rate': 7.045993234070977e-07, 'epoch': 5.26} + 33%|███▎ | 122060/371472 [9:42:40<20:51:57, 3.32it/s] 33%|███▎ | 122061/371472 [9:42:40<20:16:46, 3.42it/s] 33%|███▎ | 122062/371472 [9:42:40<20:06:13, 3.45it/s] 33%|███▎ | 122063/371472 [9:42:41<19:47:07, 3.50it/s] 33%|███▎ | 122064/371472 [9:42:41<20:06:11, 3.45it/s] 33%|███▎ | 122065/371472 [9:42:41<19:21:15, 3.58it/s] 33%|███▎ | 122066/371472 [9:42:42<20:54:52, 3.31it/s] 33%|███▎ | 122067/371472 [9:42:42<20:01:31, 3.46it/s] 33%|███▎ | 122068/371472 [9:42:42<19:25:30, 3.57it/s] 33%|███▎ | 122069/371472 [9:42:43<22:13:20, 3.12it/s] 33%|███▎ | 122070/371472 [9:42:43<21:59:02, 3.15it/s] 33%|███▎ | 122071/371472 [9:42:43<21:42:11, 3.19it/s] 33%|███▎ | 122072/371472 [9:42:44<22:33:04, 3.07it/s] 33%|███▎ | 122073/371472 [9:42:44<22:25:09, 3.09it/s] 33%|███▎ | 122074/371472 [9:42:44<22:10:10, 3.12it/s] 33%|███▎ | 122075/371472 [9:42:44<22:40:35, 3.05it/s] 33%|███▎ | 122076/371472 [9:42:45<21:35:30, 3.21it/s] 33%|███▎ | 122077/371472 [9:42:45<20:26:10, 3.39it/s] 33%|███▎ | 122078/371472 [9:42:45<19:49:53, 3.49it/s] 33%|███▎ | 122079/371472 [9:42:46<19:35:06, 3.54it/s] 33%|███▎ | 122080/371472 [9:42:46<19:48:03, 3.50it/s] {'loss': 3.3365, 'learning_rate': 7.045508414316188e-07, 'epoch': 5.26} + 33%|███▎ | 122080/371472 [9:42:46<19:48:03, 3.50it/s] 33%|███▎ | 122081/371472 [9:42:46<19:51:52, 3.49it/s] 33%|███▎ | 122082/371472 [9:42:46<19:05:48, 3.63it/s] 33%|███▎ | 122083/371472 [9:42:47<18:50:04, 3.68it/s] 33%|███▎ | 122084/371472 [9:42:47<19:34:19, 3.54it/s] 33%|███▎ | 122085/371472 [9:42:47<19:58:00, 3.47it/s] 33%|███▎ | 122086/371472 [9:42:48<19:23:41, 3.57it/s] 33%|███▎ | 122087/371472 [9:42:48<19:20:49, 3.58it/s] 33%|███▎ | 122088/371472 [9:42:48<18:41:53, 3.70it/s] 33%|███▎ | 122089/371472 [9:42:48<18:26:01, 3.76it/s] 33%|███▎ | 122090/371472 [9:42:49<18:43:13, 3.70it/s] 33%|███▎ | 122091/371472 [9:42:49<18:43:35, 3.70it/s] 33%|███▎ | 122092/371472 [9:42:49<18:43:38, 3.70it/s] 33%|███▎ | 122093/371472 [9:42:49<18:14:33, 3.80it/s] 33%|███▎ | 122094/371472 [9:42:50<18:41:31, 3.71it/s] 33%|███▎ | 122095/371472 [9:42:50<18:24:02, 3.76it/s] 33%|███▎ | 122096/371472 [9:42:50<18:11:39, 3.81it/s] 33%|███▎ | 122097/371472 [9:42:50<17:55:14, 3.87it/s] 33%|███▎ | 122098/371472 [9:42:51<17:46:10, 3.90it/s] 33%|███▎ | 122099/371472 [9:42:51<18:41:15, 3.71it/s] 33%|███▎ | 122100/371472 [9:42:51<18:25:34, 3.76it/s] {'loss': 3.4789, 'learning_rate': 7.0450235945614e-07, 'epoch': 5.26} + 33%|███▎ | 122100/371472 [9:42:51<18:25:34, 3.76it/s] 33%|███▎ | 122101/371472 [9:42:52<18:34:44, 3.73it/s] 33%|███▎ | 122102/371472 [9:42:52<18:20:38, 3.78it/s] 33%|███▎ | 122103/371472 [9:42:52<21:01:53, 3.29it/s] 33%|███▎ | 122104/371472 [9:42:53<23:09:59, 2.99it/s] 33%|███▎ | 122105/371472 [9:42:53<22:38:29, 3.06it/s] 33%|███▎ | 122106/371472 [9:42:53<21:34:03, 3.21it/s] 33%|███▎ | 122107/371472 [9:42:53<22:00:33, 3.15it/s] 33%|███▎ | 122108/371472 [9:42:54<21:06:26, 3.28it/s] 33%|███▎ | 122109/371472 [9:42:54<19:53:19, 3.48it/s] 33%|███▎ | 122110/371472 [9:42:54<20:05:26, 3.45it/s] 33%|███▎ | 122111/371472 [9:42:55<19:22:12, 3.58it/s] 33%|███▎ | 122112/371472 [9:42:55<19:26:32, 3.56it/s] 33%|███▎ | 122113/371472 [9:42:55<19:12:49, 3.61it/s] 33%|███▎ | 122114/371472 [9:42:56<22:16:34, 3.11it/s] 33%|███▎ | 122115/371472 [9:42:56<21:37:37, 3.20it/s] 33%|███▎ | 122116/371472 [9:42:56<20:05:11, 3.45it/s] 33%|███▎ | 122117/371472 [9:42:56<19:09:27, 3.62it/s] 33%|███▎ | 122118/371472 [9:42:57<19:04:00, 3.63it/s] 33%|███▎ | 122119/371472 [9:42:57<19:59:42, 3.46it/s] 33%|███▎ | 122120/371472 [9:42:57<20:32:18, 3.37it/s] {'loss': 3.1382, 'learning_rate': 7.044538774806611e-07, 'epoch': 5.26} + 33%|███▎ | 122120/371472 [9:42:57<20:32:18, 3.37it/s] 33%|███▎ | 122121/371472 [9:42:57<19:20:08, 3.58it/s] 33%|███▎ | 122122/371472 [9:42:58<20:05:01, 3.45it/s] 33%|███▎ | 122123/371472 [9:42:58<20:09:57, 3.43it/s] 33%|███▎ | 122124/371472 [9:42:58<20:00:20, 3.46it/s] 33%|███▎ | 122125/371472 [9:42:59<19:57:34, 3.47it/s] 33%|███▎ | 122126/371472 [9:42:59<19:29:56, 3.55it/s] 33%|███▎ | 122127/371472 [9:42:59<20:12:21, 3.43it/s] 33%|███▎ | 122128/371472 [9:42:59<19:43:02, 3.51it/s] 33%|███▎ | 122129/371472 [9:43:00<20:33:12, 3.37it/s] 33%|███▎ | 122130/371472 [9:43:00<20:34:24, 3.37it/s] 33%|███▎ | 122131/371472 [9:43:00<20:18:24, 3.41it/s] 33%|███▎ | 122132/371472 [9:43:01<19:52:56, 3.48it/s] 33%|███▎ | 122133/371472 [9:43:01<19:01:26, 3.64it/s] 33%|███▎ | 122134/371472 [9:43:01<20:04:31, 3.45it/s] 33%|███▎ | 122135/371472 [9:43:02<22:26:10, 3.09it/s] 33%|███▎ | 122136/371472 [9:43:02<21:34:02, 3.21it/s] 33%|███▎ | 122137/371472 [9:43:02<20:50:03, 3.32it/s] 33%|███▎ | 122138/371472 [9:43:02<20:14:02, 3.42it/s] 33%|███▎ | 122139/371472 [9:43:03<21:02:13, 3.29it/s] 33%|███▎ | 122140/371472 [9:43:03<21:04:55, 3.29it/s] {'loss': 3.4152, 'learning_rate': 7.044053955051822e-07, 'epoch': 5.26} + 33%|███▎ | 122140/371472 [9:43:03<21:04:55, 3.29it/s] 33%|███▎ | 122141/371472 [9:43:03<20:14:35, 3.42it/s] 33%|███▎ | 122142/371472 [9:43:04<19:27:56, 3.56it/s] 33%|███▎ | 122143/371472 [9:43:04<19:49:12, 3.49it/s] 33%|███▎ | 122144/371472 [9:43:04<19:36:37, 3.53it/s] 33%|███▎ | 122145/371472 [9:43:05<20:23:11, 3.40it/s] 33%|███▎ | 122146/371472 [9:43:05<19:27:31, 3.56it/s] 33%|███▎ | 122147/371472 [9:43:05<19:18:58, 3.59it/s] 33%|███▎ | 122148/371472 [9:43:05<20:10:03, 3.43it/s] 33%|███▎ | 122149/371472 [9:43:06<21:35:22, 3.21it/s] 33%|███▎ | 122150/371472 [9:43:06<20:44:55, 3.34it/s] 33%|███▎ | 122151/371472 [9:43:06<19:25:11, 3.57it/s] 33%|███▎ | 122152/371472 [9:43:07<20:41:27, 3.35it/s] 33%|███▎ | 122153/371472 [9:43:07<20:35:33, 3.36it/s] 33%|███▎ | 122154/371472 [9:43:07<20:04:12, 3.45it/s] 33%|███▎ | 122155/371472 [9:43:07<19:27:50, 3.56it/s] 33%|███▎ | 122156/371472 [9:43:08<18:56:46, 3.66it/s] 33%|███▎ | 122157/371472 [9:43:08<18:52:01, 3.67it/s] 33%|███▎ | 122158/371472 [9:43:08<18:46:00, 3.69it/s] 33%|███▎ | 122159/371472 [9:43:08<18:26:14, 3.76it/s] 33%|███▎ | 122160/371472 [9:43:09<17:58:30, 3.85it/s] {'loss': 3.4156, 'learning_rate': 7.043569135297032e-07, 'epoch': 5.26} + 33%|███▎ | 122160/371472 [9:43:09<17:58:30, 3.85it/s] 33%|███▎ | 122161/371472 [9:43:09<18:01:58, 3.84it/s] 33%|███▎ | 122162/371472 [9:43:09<18:33:31, 3.73it/s] 33%|███▎ | 122163/371472 [9:43:09<18:19:09, 3.78it/s] 33%|███▎ | 122164/371472 [9:43:10<18:12:02, 3.80it/s] 33%|███▎ | 122165/371472 [9:43:10<17:39:16, 3.92it/s] 33%|███▎ | 122166/371472 [9:43:10<18:46:45, 3.69it/s] 33%|███▎ | 122167/371472 [9:43:11<18:48:14, 3.68it/s] 33%|███▎ | 122168/371472 [9:43:11<19:28:37, 3.56it/s] 33%|███▎ | 122169/371472 [9:43:11<20:14:29, 3.42it/s] 33%|███▎ | 122170/371472 [9:43:11<19:26:47, 3.56it/s] 33%|███▎ | 122171/371472 [9:43:12<19:00:31, 3.64it/s] 33%|███▎ | 122172/371472 [9:43:12<21:16:21, 3.26it/s] 33%|███▎ | 122173/371472 [9:43:12<21:35:20, 3.21it/s] 33%|███▎ | 122174/371472 [9:43:13<20:00:04, 3.46it/s] 33%|███▎ | 122175/371472 [9:43:13<20:41:04, 3.35it/s] 33%|███▎ | 122176/371472 [9:43:13<19:40:01, 3.52it/s] 33%|███▎ | 122177/371472 [9:43:13<19:29:51, 3.55it/s] 33%|███▎ | 122178/371472 [9:43:14<18:49:26, 3.68it/s] 33%|███▎ | 122179/371472 [9:43:14<18:27:44, 3.75it/s] 33%|███▎ | 122180/371472 [9:43:14<18:36:48, 3.72it/s] {'loss': 3.3359, 'learning_rate': 7.043084315542244e-07, 'epoch': 5.26} + 33%|███▎ | 122180/371472 [9:43:14<18:36:48, 3.72it/s] 33%|███▎ | 122181/371472 [9:43:15<18:05:57, 3.83it/s] 33%|███▎ | 122182/371472 [9:43:15<20:00:30, 3.46it/s] 33%|███▎ | 122183/371472 [9:43:15<19:53:51, 3.48it/s] 33%|███▎ | 122184/371472 [9:43:15<20:06:41, 3.44it/s] 33%|███▎ | 122185/371472 [9:43:16<19:45:01, 3.51it/s] 33%|███▎ | 122186/371472 [9:43:16<19:09:03, 3.62it/s] 33%|███▎ | 122187/371472 [9:43:16<19:18:02, 3.59it/s] 33%|███▎ | 122188/371472 [9:43:17<19:08:14, 3.62it/s] 33%|███▎ | 122189/371472 [9:43:17<19:07:39, 3.62it/s] 33%|███▎ | 122190/371472 [9:43:17<19:50:49, 3.49it/s] 33%|███▎ | 122191/371472 [9:43:17<20:13:26, 3.42it/s] 33%|███▎ | 122192/371472 [9:43:18<20:33:13, 3.37it/s] 33%|███▎ | 122193/371472 [9:43:18<21:29:36, 3.22it/s] 33%|███▎ | 122194/371472 [9:43:18<20:46:44, 3.33it/s] 33%|███▎ | 122195/371472 [9:43:19<20:27:21, 3.39it/s] 33%|███▎ | 122196/371472 [9:43:19<19:21:54, 3.58it/s] 33%|███▎ | 122197/371472 [9:43:19<21:13:07, 3.26it/s] 33%|███▎ | 122198/371472 [9:43:20<20:13:03, 3.42it/s] 33%|███▎ | 122199/371472 [9:43:20<18:55:39, 3.66it/s] 33%|███▎ | 122200/371472 [9:43:20<19:03:07, 3.63it/s] {'loss': 3.2836, 'learning_rate': 7.042599495787455e-07, 'epoch': 5.26} + 33%|███▎ | 122200/371472 [9:43:20<19:03:07, 3.63it/s] 33%|███▎ | 122201/371472 [9:43:20<19:33:01, 3.54it/s] 33%|███▎ | 122202/371472 [9:43:21<19:39:11, 3.52it/s] 33%|███▎ | 122203/371472 [9:43:21<18:51:09, 3.67it/s] 33%|███▎ | 122204/371472 [9:43:21<19:33:17, 3.54it/s] 33%|███▎ | 122205/371472 [9:43:21<19:27:31, 3.56it/s] 33%|███▎ | 122206/371472 [9:43:22<20:50:01, 3.32it/s] 33%|███▎ | 122207/371472 [9:43:22<20:54:53, 3.31it/s] 33%|███▎ | 122208/371472 [9:43:22<20:06:18, 3.44it/s] 33%|███▎ | 122209/371472 [9:43:23<20:28:50, 3.38it/s] 33%|███▎ | 122210/371472 [9:43:23<19:33:37, 3.54it/s] 33%|███▎ | 122211/371472 [9:43:23<18:39:11, 3.71it/s] 33%|███▎ | 122212/371472 [9:43:23<18:39:51, 3.71it/s] 33%|███▎ | 122213/371472 [9:43:24<19:06:25, 3.62it/s] 33%|███▎ | 122214/371472 [9:43:24<18:25:53, 3.76it/s] 33%|███▎ | 122215/371472 [9:43:24<18:00:34, 3.84it/s] 33%|███▎ | 122216/371472 [9:43:24<18:39:46, 3.71it/s] 33%|███▎ | 122217/371472 [9:43:25<20:07:30, 3.44it/s] 33%|███▎ | 122218/371472 [9:43:25<19:12:40, 3.60it/s] 33%|███▎ | 122219/371472 [9:43:25<19:11:19, 3.61it/s] 33%|███▎ | 122220/371472 [9:43:26<18:26:37, 3.75it/s] {'loss': 3.3352, 'learning_rate': 7.042114676032666e-07, 'epoch': 5.26} + 33%|███▎ | 122220/371472 [9:43:26<18:26:37, 3.75it/s] 33%|███▎ | 122221/371472 [9:43:26<18:31:52, 3.74it/s] 33%|███▎ | 122222/371472 [9:43:26<18:31:58, 3.74it/s] 33%|███▎ | 122223/371472 [9:43:26<19:04:21, 3.63it/s] 33%|███▎ | 122224/371472 [9:43:27<19:03:15, 3.63it/s] 33%|███▎ | 122225/371472 [9:43:27<18:09:10, 3.81it/s] 33%|███▎ | 122226/371472 [9:43:27<19:10:53, 3.61it/s] 33%|███▎ | 122227/371472 [9:43:28<20:43:06, 3.34it/s] 33%|███▎ | 122228/371472 [9:43:28<20:01:11, 3.46it/s] 33%|███▎ | 122229/371472 [9:43:28<20:50:47, 3.32it/s] 33%|███▎ | 122230/371472 [9:43:28<20:03:45, 3.45it/s] 33%|███▎ | 122231/371472 [9:43:29<18:58:13, 3.65it/s] 33%|███▎ | 122232/371472 [9:43:29<18:26:23, 3.75it/s] 33%|███▎ | 122233/371472 [9:43:29<18:13:08, 3.80it/s] 33%|███▎ | 122234/371472 [9:43:29<18:15:52, 3.79it/s] 33%|███▎ | 122235/371472 [9:43:30<18:03:21, 3.83it/s] 33%|███▎ | 122236/371472 [9:43:30<18:06:08, 3.82it/s] 33%|███▎ | 122237/371472 [9:43:30<17:39:04, 3.92it/s] 33%|███▎ | 122238/371472 [9:43:30<17:46:53, 3.89it/s] 33%|███▎ | 122239/371472 [9:43:31<17:50:01, 3.88it/s] 33%|███▎ | 122240/371472 [9:43:31<18:25:28, 3.76it/s] {'loss': 3.1764, 'learning_rate': 7.041629856277877e-07, 'epoch': 5.27} + 33%|███▎ | 122240/371472 [9:43:31<18:25:28, 3.76it/s] 33%|███▎ | 122241/371472 [9:43:31<18:15:58, 3.79it/s] 33%|███▎ | 122242/371472 [9:43:32<18:02:11, 3.84it/s] 33%|███▎ | 122243/371472 [9:43:32<17:42:35, 3.91it/s] 33%|███▎ | 122244/371472 [9:43:32<17:56:29, 3.86it/s] 33%|███▎ | 122245/371472 [9:43:32<17:53:56, 3.87it/s] 33%|███▎ | 122246/371472 [9:43:33<21:09:08, 3.27it/s] 33%|███▎ | 122247/371472 [9:43:33<19:52:57, 3.48it/s] 33%|███▎ | 122248/371472 [9:43:33<19:00:19, 3.64it/s] 33%|███▎ | 122249/371472 [9:43:33<18:21:54, 3.77it/s] 33%|███▎ | 122250/371472 [9:43:34<18:53:45, 3.66it/s] 33%|███▎ | 122251/371472 [9:43:34<19:23:34, 3.57it/s] 33%|███▎ | 122252/371472 [9:43:34<18:55:43, 3.66it/s] 33%|███▎ | 122253/371472 [9:43:35<18:53:54, 3.66it/s] 33%|███▎ | 122254/371472 [9:43:35<19:15:32, 3.59it/s] 33%|███▎ | 122255/371472 [9:43:35<20:22:29, 3.40it/s] 33%|███▎ | 122256/371472 [9:43:35<19:38:38, 3.52it/s] 33%|███▎ | 122257/371472 [9:43:36<21:02:43, 3.29it/s] 33%|███▎ | 122258/371472 [9:43:36<20:03:40, 3.45it/s] 33%|███▎ | 122259/371472 [9:43:36<19:04:07, 3.63it/s] 33%|███▎ | 122260/371472 [9:43:37<19:06:25, 3.62it/s] {'loss': 3.2883, 'learning_rate': 7.041145036523088e-07, 'epoch': 5.27} + 33%|███▎ | 122260/371472 [9:43:37<19:06:25, 3.62it/s] 33%|███▎ | 122261/371472 [9:43:37<18:06:57, 3.82it/s] 33%|███▎ | 122262/371472 [9:43:37<17:31:33, 3.95it/s] 33%|███▎ | 122263/371472 [9:43:37<17:34:09, 3.94it/s] 33%|███▎ | 122264/371472 [9:43:38<18:28:06, 3.75it/s] 33%|███▎ | 122265/371472 [9:43:38<19:04:08, 3.63it/s] 33%|███▎ | 122266/371472 [9:43:38<19:23:32, 3.57it/s] 33%|███▎ | 122267/371472 [9:43:38<19:22:34, 3.57it/s] 33%|███▎ | 122268/371472 [9:43:39<19:21:07, 3.58it/s] 33%|███▎ | 122269/371472 [9:43:39<18:46:34, 3.69it/s] 33%|███▎ | 122270/371472 [9:43:39<19:33:16, 3.54it/s] 33%|███▎ | 122271/371472 [9:43:40<19:31:59, 3.54it/s] 33%|███▎ | 122272/371472 [9:43:40<18:31:31, 3.74it/s] 33%|███▎ | 122273/371472 [9:43:40<18:25:59, 3.76it/s] 33%|███▎ | 122274/371472 [9:43:40<18:57:08, 3.65it/s] 33%|███▎ | 122275/371472 [9:43:41<18:46:16, 3.69it/s] 33%|███▎ | 122276/371472 [9:43:41<19:15:53, 3.59it/s] 33%|███▎ | 122277/371472 [9:43:41<19:24:11, 3.57it/s] 33%|███▎ | 122278/371472 [9:43:41<18:54:28, 3.66it/s] 33%|███▎ | 122279/371472 [9:43:42<20:11:19, 3.43it/s] 33%|███▎ | 122280/371472 [9:43:42<19:30:27, 3.55it/s] {'loss': 3.399, 'learning_rate': 7.040660216768298e-07, 'epoch': 5.27} + 33%|███▎ | 122280/371472 [9:43:42<19:30:27, 3.55it/s] 33%|███▎ | 122281/371472 [9:43:42<20:24:08, 3.39it/s] 33%|███▎ | 122282/371472 [9:43:43<21:36:01, 3.20it/s] 33%|███▎ | 122283/371472 [9:43:43<20:45:46, 3.33it/s] 33%|███▎ | 122284/371472 [9:43:43<19:24:19, 3.57it/s] 33%|███▎ | 122285/371472 [9:43:44<19:15:24, 3.59it/s] 33%|███▎ | 122286/371472 [9:43:44<21:04:12, 3.29it/s] 33%|███▎ | 122287/371472 [9:43:44<21:08:56, 3.27it/s] 33%|███▎ | 122288/371472 [9:43:44<20:07:45, 3.44it/s] 33%|███▎ | 122289/371472 [9:43:45<19:09:45, 3.61it/s] 33%|███▎ | 122290/371472 [9:43:45<19:47:14, 3.50it/s] 33%|███▎ | 122291/371472 [9:43:45<19:32:51, 3.54it/s] 33%|███▎ | 122292/371472 [9:43:46<19:49:06, 3.49it/s] 33%|███▎ | 122293/371472 [9:43:46<19:29:50, 3.55it/s] 33%|███▎ | 122294/371472 [9:43:46<20:31:17, 3.37it/s] 33%|███▎ | 122295/371472 [9:43:46<21:00:08, 3.30it/s] 33%|███▎ | 122296/371472 [9:43:47<20:34:05, 3.37it/s] 33%|███▎ | 122297/371472 [9:43:47<21:18:20, 3.25it/s] 33%|███▎ | 122298/371472 [9:43:47<21:56:13, 3.16it/s] 33%|███▎ | 122299/371472 [9:43:48<20:16:55, 3.41it/s] 33%|███▎ | 122300/371472 [9:43:48<19:08:35, 3.62it/s] {'loss': 3.263, 'learning_rate': 7.04017539701351e-07, 'epoch': 5.27} + 33%|███▎ | 122300/371472 [9:43:48<19:08:35, 3.62it/s] 33%|███▎ | 122301/371472 [9:43:48<19:59:25, 3.46it/s] 33%|███▎ | 122302/371472 [9:43:49<19:45:46, 3.50it/s] 33%|███▎ | 122303/371472 [9:43:49<19:05:29, 3.63it/s] 33%|███▎ | 122304/371472 [9:43:49<19:36:41, 3.53it/s] 33%|███▎ | 122305/371472 [9:43:49<19:27:54, 3.56it/s] 33%|███▎ | 122306/371472 [9:43:50<19:46:39, 3.50it/s] 33%|███▎ | 122307/371472 [9:43:50<19:48:54, 3.49it/s] 33%|███▎ | 122308/371472 [9:43:50<20:45:28, 3.33it/s] 33%|███▎ | 122309/371472 [9:43:51<19:37:00, 3.53it/s] 33%|███▎ | 122310/371472 [9:43:51<20:35:31, 3.36it/s] 33%|███▎ | 122311/371472 [9:43:51<23:13:20, 2.98it/s] 33%|███▎ | 122312/371472 [9:43:51<21:10:01, 3.27it/s] 33%|███▎ | 122313/371472 [9:43:52<21:18:34, 3.25it/s] 33%|███▎ | 122314/371472 [9:43:52<20:26:59, 3.38it/s] 33%|███▎ | 122315/371472 [9:43:52<20:04:35, 3.45it/s] 33%|███▎ | 122316/371472 [9:43:53<19:14:07, 3.60it/s] 33%|███▎ | 122317/371472 [9:43:53<18:58:25, 3.65it/s] 33%|███▎ | 122318/371472 [9:43:53<20:03:27, 3.45it/s] 33%|███▎ | 122319/371472 [9:43:53<19:53:36, 3.48it/s] 33%|███▎ | 122320/371472 [9:43:54<19:23:29, 3.57it/s] {'loss': 3.1207, 'learning_rate': 7.039690577258721e-07, 'epoch': 5.27} + 33%|███▎ | 122320/371472 [9:43:54<19:23:29, 3.57it/s] 33%|███▎ | 122321/371472 [9:43:54<18:57:06, 3.65it/s] 33%|███▎ | 122322/371472 [9:43:54<19:02:34, 3.63it/s] 33%|███▎ | 122323/371472 [9:43:55<19:19:46, 3.58it/s] 33%|███▎ | 122324/371472 [9:43:55<20:34:47, 3.36it/s] 33%|███▎ | 122325/371472 [9:43:55<20:39:30, 3.35it/s] 33%|███▎ | 122326/371472 [9:43:55<19:55:03, 3.47it/s] 33%|███▎ | 122327/371472 [9:43:56<19:34:46, 3.53it/s] 33%|███▎ | 122328/371472 [9:43:56<19:28:33, 3.55it/s] 33%|███▎ | 122329/371472 [9:43:56<19:03:20, 3.63it/s] 33%|███▎ | 122330/371472 [9:43:57<20:57:14, 3.30it/s] 33%|███▎ | 122331/371472 [9:43:57<20:29:35, 3.38it/s] 33%|███▎ | 122332/371472 [9:43:57<20:21:16, 3.40it/s] 33%|███▎ | 122333/371472 [9:43:57<20:11:54, 3.43it/s] 33%|███▎ | 122334/371472 [9:43:58<20:09:08, 3.43it/s] 33%|███▎ | 122335/371472 [9:43:58<20:02:56, 3.45it/s] 33%|███▎ | 122336/371472 [9:43:58<19:35:21, 3.53it/s] 33%|███▎ | 122337/371472 [9:43:59<19:10:46, 3.61it/s] 33%|███▎ | 122338/371472 [9:43:59<19:45:35, 3.50it/s] 33%|███▎ | 122339/371472 [9:43:59<20:06:29, 3.44it/s] 33%|███▎ | 122340/371472 [9:43:59<19:18:14, 3.58it/s] {'loss': 3.1844, 'learning_rate': 7.039205757503932e-07, 'epoch': 5.27} + 33%|███▎ | 122340/371472 [9:43:59<19:18:14, 3.58it/s] 33%|███▎ | 122341/371472 [9:44:00<19:18:43, 3.58it/s] 33%|███▎ | 122342/371472 [9:44:00<18:58:27, 3.65it/s] 33%|███▎ | 122343/371472 [9:44:00<20:27:53, 3.38it/s] 33%|███▎ | 122344/371472 [9:44:01<20:08:19, 3.44it/s] 33%|███▎ | 122345/371472 [9:44:01<19:46:47, 3.50it/s] 33%|███▎ | 122346/371472 [9:44:01<20:19:35, 3.40it/s] 33%|███▎ | 122347/371472 [9:44:02<21:37:04, 3.20it/s] 33%|███▎ | 122348/371472 [9:44:02<19:57:57, 3.47it/s] 33%|███▎ | 122349/371472 [9:44:02<21:46:34, 3.18it/s] 33%|███▎ | 122350/371472 [9:44:02<21:16:52, 3.25it/s] 33%|���██▎ | 122351/371472 [9:44:03<20:47:13, 3.33it/s] 33%|███▎ | 122352/371472 [9:44:03<20:03:54, 3.45it/s] 33%|███▎ | 122353/371472 [9:44:03<19:05:01, 3.63it/s] 33%|███▎ | 122354/371472 [9:44:04<18:39:33, 3.71it/s] 33%|███▎ | 122355/371472 [9:44:04<20:17:53, 3.41it/s] 33%|███▎ | 122356/371472 [9:44:04<19:10:13, 3.61it/s] 33%|███▎ | 122357/371472 [9:44:04<19:27:51, 3.56it/s] 33%|███▎ | 122358/371472 [9:44:05<20:08:00, 3.44it/s] 33%|███▎ | 122359/371472 [9:44:05<20:14:30, 3.42it/s] 33%|███▎ | 122360/371472 [9:44:05<19:21:46, 3.57it/s] {'loss': 3.1571, 'learning_rate': 7.038720937749143e-07, 'epoch': 5.27} + 33%|███▎ | 122360/371472 [9:44:05<19:21:46, 3.57it/s] 33%|███▎ | 122361/371472 [9:44:06<19:55:38, 3.47it/s] 33%|███▎ | 122362/371472 [9:44:06<19:31:11, 3.54it/s] 33%|███▎ | 122363/371472 [9:44:06<19:32:12, 3.54it/s] 33%|███▎ | 122364/371472 [9:44:06<19:40:00, 3.52it/s] 33%|███▎ | 122365/371472 [9:44:07<18:54:48, 3.66it/s] 33%|███▎ | 122366/371472 [9:44:07<19:51:24, 3.48it/s] 33%|███▎ | 122367/371472 [9:44:07<19:13:53, 3.60it/s] 33%|███▎ | 122368/371472 [9:44:07<18:28:32, 3.75it/s] 33%|███▎ | 122369/371472 [9:44:08<18:05:42, 3.82it/s] 33%|███▎ | 122370/371472 [9:44:08<19:02:57, 3.63it/s] 33%|███▎ | 122371/371472 [9:44:08<21:42:24, 3.19it/s] 33%|███▎ | 122372/371472 [9:44:09<20:46:51, 3.33it/s] 33%|███▎ | 122373/371472 [9:44:09<19:42:29, 3.51it/s] 33%|███▎ | 122374/371472 [9:44:09<19:36:32, 3.53it/s] 33%|███▎ | 122375/371472 [9:44:10<19:30:45, 3.55it/s] 33%|███▎ | 122376/371472 [9:44:10<19:06:55, 3.62it/s] 33%|███▎ | 122377/371472 [9:44:10<19:14:49, 3.59it/s] 33%|███▎ | 122378/371472 [9:44:10<19:05:05, 3.63it/s] 33%|███▎ | 122379/371472 [9:44:11<19:19:50, 3.58it/s] 33%|███▎ | 122380/371472 [9:44:11<18:36:26, 3.72it/s] {'loss': 3.1762, 'learning_rate': 7.038236117994356e-07, 'epoch': 5.27} + 33%|███▎ | 122380/371472 [9:44:11<18:36:26, 3.72it/s] 33%|███▎ | 122381/371472 [9:44:11<18:44:36, 3.69it/s] 33%|███▎ | 122382/371472 [9:44:11<20:13:03, 3.42it/s] 33%|███▎ | 122383/371472 [9:44:12<20:17:20, 3.41it/s] 33%|███▎ | 122384/371472 [9:44:12<20:53:22, 3.31it/s] 33%|███▎ | 122385/371472 [9:44:12<19:31:56, 3.54it/s] 33%|███▎ | 122386/371472 [9:44:13<20:11:34, 3.43it/s] 33%|███▎ | 122387/371472 [9:44:13<18:54:40, 3.66it/s] 33%|███▎ | 122388/371472 [9:44:13<18:36:16, 3.72it/s] 33%|███▎ | 122389/371472 [9:44:13<18:01:54, 3.84it/s] 33%|███▎ | 122390/371472 [9:44:14<20:18:04, 3.41it/s] 33%|███▎ | 122391/371472 [9:44:14<19:58:38, 3.46it/s] 33%|███▎ | 122392/371472 [9:44:14<18:54:25, 3.66it/s] 33%|███▎ | 122393/371472 [9:44:15<20:24:08, 3.39it/s] 33%|███▎ | 122394/371472 [9:44:15<19:19:30, 3.58it/s] 33%|███▎ | 122395/371472 [9:44:15<18:15:31, 3.79it/s] 33%|███▎ | 122396/371472 [9:44:15<18:46:02, 3.69it/s] 33%|███▎ | 122397/371472 [9:44:16<18:40:59, 3.70it/s] 33%|███▎ | 122398/371472 [9:44:16<18:50:36, 3.67it/s] 33%|███▎ | 122399/371472 [9:44:16<19:30:40, 3.55it/s] 33%|███▎ | 122400/371472 [9:44:16<19:16:26, 3.59it/s] {'loss': 3.2898, 'learning_rate': 7.037751298239565e-07, 'epoch': 5.27} + 33%|███▎ | 122400/371472 [9:44:16<19:16:26, 3.59it/s] 33%|███▎ | 122401/371472 [9:44:17<20:38:43, 3.35it/s] 33%|███▎ | 122402/371472 [9:44:17<21:03:16, 3.29it/s] 33%|███▎ | 122403/371472 [9:44:17<20:12:57, 3.42it/s] 33%|███▎ | 122404/371472 [9:44:18<19:37:04, 3.53it/s] 33%|███▎ | 122405/371472 [9:44:18<18:50:08, 3.67it/s] 33%|███▎ | 122406/371472 [9:44:18<19:38:06, 3.52it/s] 33%|███▎ | 122407/371472 [9:44:19<19:41:51, 3.51it/s] 33%|███▎ | 122408/371472 [9:44:19<19:44:49, 3.50it/s] 33%|███▎ | 122409/371472 [9:44:19<18:54:26, 3.66it/s] 33%|███▎ | 122410/371472 [9:44:19<18:32:11, 3.73it/s] 33%|███▎ | 122411/371472 [9:44:20<18:33:46, 3.73it/s] 33%|███▎ | 122412/371472 [9:44:20<19:54:22, 3.48it/s] 33%|███▎ | 122413/371472 [9:44:20<20:04:34, 3.45it/s] 33%|███▎ | 122414/371472 [9:44:21<20:50:58, 3.32it/s] 33%|███▎ | 122415/371472 [9:44:21<20:27:36, 3.38it/s] 33%|███▎ | 122416/371472 [9:44:21<20:36:10, 3.36it/s] 33%|███▎ | 122417/371472 [9:44:21<20:46:24, 3.33it/s] 33%|███▎ | 122418/371472 [9:44:22<19:36:45, 3.53it/s] 33%|███▎ | 122419/371472 [9:44:22<19:15:54, 3.59it/s] 33%|███▎ | 122420/371472 [9:44:22<19:31:55, 3.54it/s] {'loss': 3.4552, 'learning_rate': 7.037266478484776e-07, 'epoch': 5.27} + 33%|███▎ | 122420/371472 [9:44:22<19:31:55, 3.54it/s] 33%|███▎ | 122421/371472 [9:44:22<18:27:23, 3.75it/s] 33%|███▎ | 122422/371472 [9:44:23<18:02:15, 3.84it/s] 33%|███▎ | 122423/371472 [9:44:23<18:17:16, 3.78it/s] 33%|███▎ | 122424/371472 [9:44:23<18:19:59, 3.77it/s] 33%|███▎ | 122425/371472 [9:44:23<18:13:55, 3.79it/s] 33%|███▎ | 122426/371472 [9:44:24<18:19:15, 3.78it/s] 33%|███▎ | 122427/371472 [9:44:24<19:25:57, 3.56it/s] 33%|███▎ | 122428/371472 [9:44:24<19:53:48, 3.48it/s] 33%|███▎ | 122429/371472 [9:44:25<20:21:34, 3.40it/s] 33%|███▎ | 122430/371472 [9:44:25<19:47:03, 3.50it/s] 33%|███▎ | 122431/371472 [9:44:25<19:07:43, 3.62it/s] 33%|███▎ | 122432/371472 [9:44:26<19:39:02, 3.52it/s] 33%|███▎ | 122433/371472 [9:44:26<19:18:03, 3.58it/s] 33%|███▎ | 122434/371472 [9:44:26<18:47:18, 3.68it/s] 33%|███▎ | 122435/371472 [9:44:26<18:54:05, 3.66it/s] 33%|███▎ | 122436/371472 [9:44:27<18:17:46, 3.78it/s] 33%|███▎ | 122437/371472 [9:44:27<17:57:30, 3.85it/s] 33%|███▎ | 122438/371472 [9:44:27<18:20:54, 3.77it/s] 33%|███▎ | 122439/371472 [9:44:27<18:18:35, 3.78it/s] 33%|███▎ | 122440/371472 [9:44:28<18:13:06, 3.80it/s] {'loss': 3.274, 'learning_rate': 7.036781658729987e-07, 'epoch': 5.27} + 33%|███▎ | 122440/371472 [9:44:28<18:13:06, 3.80it/s] 33%|███▎ | 122441/371472 [9:44:28<18:23:02, 3.76it/s] 33%|███▎ | 122442/371472 [9:44:28<20:48:34, 3.32it/s] 33%|███▎ | 122443/371472 [9:44:29<19:42:56, 3.51it/s] 33%|███▎ | 122444/371472 [9:44:29<19:07:26, 3.62it/s] 33%|███▎ | 122445/371472 [9:44:29<20:14:39, 3.42it/s] 33%|███▎ | 122446/371472 [9:44:29<19:18:16, 3.58it/s] 33%|███▎ | 122447/371472 [9:44:30<18:41:29, 3.70it/s] 33%|███▎ | 122448/371472 [9:44:30<18:33:43, 3.73it/s] 33%|███▎ | 122449/371472 [9:44:30<18:03:07, 3.83it/s] 33%|███▎ | 122450/371472 [9:44:30<19:42:12, 3.51it/s] 33%|███▎ | 122451/371472 [9:44:31<18:45:43, 3.69it/s] 33%|███▎ | 122452/371472 [9:44:31<18:47:36, 3.68it/s] 33%|███▎ | 122453/371472 [9:44:31<19:08:45, 3.61it/s] 33%|███▎ | 122454/371472 [9:44:32<19:16:17, 3.59it/s] 33%|███▎ | 122455/371472 [9:44:32<18:17:23, 3.78it/s] 33%|███▎ | 122456/371472 [9:44:32<19:05:56, 3.62it/s] 33%|███▎ | 122457/371472 [9:44:32<21:13:32, 3.26it/s] 33%|███▎ | 122458/371472 [9:44:33<20:23:56, 3.39it/s] 33%|███▎ | 122459/371472 [9:44:33<19:14:12, 3.60it/s] 33%|███▎ | 122460/371472 [9:44:33<18:13:06, 3.80it/s] {'loss': 3.2178, 'learning_rate': 7.036296838975198e-07, 'epoch': 5.27} + 33%|███▎ | 122460/371472 [9:44:33<18:13:06, 3.80it/s] 33%|███▎ | 122461/371472 [9:44:33<18:48:54, 3.68it/s] 33%|███▎ | 122462/371472 [9:44:34<19:33:07, 3.54it/s] 33%|███▎ | 122463/371472 [9:44:34<19:36:37, 3.53it/s] 33%|███▎ | 122464/371472 [9:44:34<20:05:26, 3.44it/s] 33%|███▎ | 122465/371472 [9:44:35<20:26:57, 3.38it/s] 33%|███▎ | 122466/371472 [9:44:35<20:14:29, 3.42it/s] 33%|███▎ | 122467/371472 [9:44:35<20:15:24, 3.41it/s] 33%|███▎ | 122468/371472 [9:44:36<21:48:47, 3.17it/s] 33%|███▎ | 122469/371472 [9:44:36<21:18:10, 3.25it/s] 33%|███▎ | 122470/371472 [9:44:36<20:59:13, 3.30it/s] 33%|███▎ | 122471/371472 [9:44:36<19:55:15, 3.47it/s] 33%|███▎ | 122472/371472 [9:44:37<19:35:18, 3.53it/s] 33%|███▎ | 122473/371472 [9:44:37<18:44:36, 3.69it/s] 33%|███▎ | 122474/371472 [9:44:37<18:21:02, 3.77it/s] 33%|███▎ | 122475/371472 [9:44:37<18:19:25, 3.77it/s] 33%|███▎ | 122476/371472 [9:44:38<17:59:17, 3.85it/s] 33%|███▎ | 122477/371472 [9:44:38<18:44:34, 3.69it/s] 33%|███▎ | 122478/371472 [9:44:38<19:49:40, 3.49it/s] 33%|███▎ | 122479/371472 [9:44:39<19:09:58, 3.61it/s] 33%|███▎ | 122480/371472 [9:44:39<18:26:27, 3.75it/s] {'loss': 3.0688, 'learning_rate': 7.03581201922041e-07, 'epoch': 5.28} + 33%|███▎ | 122480/371472 [9:44:39<18:26:27, 3.75it/s] 33%|███▎ | 122481/371472 [9:44:39<19:32:16, 3.54it/s] 33%|███▎ | 122482/371472 [9:44:39<18:48:03, 3.68it/s] 33%|███▎ | 122483/371472 [9:44:40<18:04:29, 3.83it/s] 33%|███▎ | 122484/371472 [9:44:40<18:39:19, 3.71it/s] 33%|███▎ | 122485/371472 [9:44:40<19:33:36, 3.54it/s] 33%|███▎ | 122486/371472 [9:44:41<20:04:12, 3.45it/s] 33%|███▎ | 122487/371472 [9:44:41<19:23:55, 3.57it/s] 33%|███▎ | 122488/371472 [9:44:41<19:46:10, 3.50it/s] 33%|███▎ | 122489/371472 [9:44:41<18:56:13, 3.65it/s] 33%|███▎ | 122490/371472 [9:44:42<21:36:45, 3.20it/s] 33%|███▎ | 122491/371472 [9:44:42<21:43:12, 3.18it/s] 33%|███▎ | 122492/371472 [9:44:42<21:48:18, 3.17it/s] 33%|███▎ | 122493/371472 [9:44:43<20:54:54, 3.31it/s] 33%|███▎ | 122494/371472 [9:44:43<20:09:09, 3.43it/s] 33%|███▎ | 122495/371472 [9:44:43<19:57:01, 3.47it/s] 33%|███▎ | 122496/371472 [9:44:43<19:04:54, 3.62it/s] 33%|███▎ | 122497/371472 [9:44:44<19:54:38, 3.47it/s] 33%|███▎ | 122498/371472 [9:44:44<22:47:11, 3.04it/s] 33%|███▎ | 122499/371472 [9:44:45<21:57:31, 3.15it/s] 33%|███▎ | 122500/371472 [9:44:45<20:34:23, 3.36it/s] {'loss': 3.1846, 'learning_rate': 7.035327199465621e-07, 'epoch': 5.28} + 33%|███▎ | 122500/371472 [9:44:45<20:34:23, 3.36it/s] 33%|███▎ | 122501/371472 [9:44:45<21:06:26, 3.28it/s] 33%|███▎ | 122502/371472 [9:44:45<20:00:04, 3.46it/s] 33%|███▎ | 122503/371472 [9:44:46<19:26:36, 3.56it/s] 33%|███▎ | 122504/371472 [9:44:46<19:17:23, 3.59it/s] 33%|███▎ | 122505/371472 [9:44:46<18:37:07, 3.71it/s] 33%|███▎ | 122506/371472 [9:44:46<18:21:54, 3.77it/s] 33%|███▎ | 122507/371472 [9:44:47<18:28:42, 3.74it/s] 33%|███▎ | 122508/371472 [9:44:47<21:19:56, 3.24it/s] 33%|███▎ | 122509/371472 [9:44:47<20:13:30, 3.42it/s] 33%|███▎ | 122510/371472 [9:44:48<20:04:54, 3.44it/s] 33%|███▎ | 122511/371472 [9:44:48<19:23:09, 3.57it/s] 33%|███▎ | 122512/371472 [9:44:48<19:21:06, 3.57it/s] 33%|███▎ | 122513/371472 [9:44:48<18:54:05, 3.66it/s] 33%|███▎ | 122514/371472 [9:44:49<21:40:33, 3.19it/s] 33%|███▎ | 122515/371472 [9:44:49<22:34:22, 3.06it/s] 33%|███▎ | 122516/371472 [9:44:49<22:42:35, 3.05it/s] 33%|███▎ | 122517/371472 [9:44:50<21:55:47, 3.15it/s] 33%|███▎ | 122518/371472 [9:44:50<21:19:29, 3.24it/s] 33%|███▎ | 122519/371472 [9:44:50<21:37:34, 3.20it/s] 33%|███▎ | 122520/371472 [9:44:51<21:19:38, 3.24it/s] {'loss': 3.2659, 'learning_rate': 7.034842379710832e-07, 'epoch': 5.28} + 33%|███▎ | 122520/371472 [9:44:51<21:19:38, 3.24it/s] 33%|███▎ | 122521/371472 [9:44:51<20:25:04, 3.39it/s] 33%|███▎ | 122522/371472 [9:44:51<20:55:46, 3.30it/s] 33%|███▎ | 122523/371472 [9:44:52<21:42:45, 3.18it/s] 33%|███▎ | 122524/371472 [9:44:52<22:27:08, 3.08it/s] 33%|███▎ | 122525/371472 [9:44:52<21:00:45, 3.29it/s] 33%|███▎ | 122526/371472 [9:44:53<22:56:41, 3.01it/s] 33%|███▎ | 122527/371472 [9:44:53<21:20:37, 3.24it/s] 33%|███▎ | 122528/371472 [9:44:53<21:14:25, 3.26it/s] 33%|███▎ | 122529/371472 [9:44:53<21:28:32, 3.22it/s] 33%|███▎ | 122530/371472 [9:44:54<20:42:08, 3.34it/s] 33%|███▎ | 122531/371472 [9:44:54<20:15:20, 3.41it/s] 33%|███▎ | 122532/371472 [9:44:54<20:12:52, 3.42it/s] 33%|███▎ | 122533/371472 [9:44:55<21:10:58, 3.26it/s] 33%|███▎ | 122534/371472 [9:44:55<20:36:46, 3.35it/s] 33%|███▎ | 122535/371472 [9:44:55<20:02:16, 3.45it/s] 33%|███▎ | 122536/371472 [9:44:56<20:05:20, 3.44it/s] 33%|███▎ | 122537/371472 [9:44:56<19:47:36, 3.49it/s] 33%|███▎ | 122538/371472 [9:44:56<18:59:09, 3.64it/s] 33%|███▎ | 122539/371472 [9:44:56<19:27:34, 3.55it/s] 33%|███▎ | 122540/371472 [9:44:57<18:23:07, 3.76it/s] {'loss': 3.2669, 'learning_rate': 7.034357559956042e-07, 'epoch': 5.28} + 33%|███▎ | 122540/371472 [9:44:57<18:23:07, 3.76it/s] 33%|███▎ | 122541/371472 [9:44:57<17:51:45, 3.87it/s] 33%|███▎ | 122542/371472 [9:44:57<17:19:10, 3.99it/s] 33%|███▎ | 122543/371472 [9:44:57<17:41:22, 3.91it/s] 33%|███▎ | 122544/371472 [9:44:58<18:52:33, 3.66it/s] 33%|███▎ | 122545/371472 [9:44:58<18:20:27, 3.77it/s] 33%|███▎ | 122546/371472 [9:44:58<18:06:52, 3.82it/s] 33%|███▎ | 122547/371472 [9:44:58<18:34:14, 3.72it/s] 33%|███▎ | 122548/371472 [9:44:59<19:22:43, 3.57it/s] 33%|███▎ | 122549/371472 [9:44:59<19:05:06, 3.62it/s] 33%|███▎ | 122550/371472 [9:44:59<18:13:15, 3.79it/s] 33%|███▎ | 122551/371472 [9:44:59<18:04:05, 3.83it/s] 33%|███▎ | 122552/371472 [9:45:00<18:34:17, 3.72it/s] 33%|███▎ | 122553/371472 [9:45:00<18:50:33, 3.67it/s] 33%|███▎ | 122554/371472 [9:45:00<18:44:35, 3.69it/s] 33%|███▎ | 122555/371472 [9:45:01<19:55:06, 3.47it/s] 33%|███▎ | 122556/371472 [9:45:01<19:53:56, 3.47it/s] 33%|███▎ | 122557/371472 [9:45:01<21:44:31, 3.18it/s] 33%|███▎ | 122558/371472 [9:45:02<20:34:35, 3.36it/s] 33%|███▎ | 122559/371472 [9:45:02<20:34:45, 3.36it/s] 33%|███▎ | 122560/371472 [9:45:02<19:27:54, 3.55it/s] {'loss': 3.0736, 'learning_rate': 7.033872740201254e-07, 'epoch': 5.28} + 33%|███▎ | 122560/371472 [9:45:02<19:27:54, 3.55it/s] 33%|███▎ | 122561/371472 [9:45:02<19:04:44, 3.62it/s] 33%|███▎ | 122562/371472 [9:45:03<18:48:33, 3.68it/s] 33%|███▎ | 122563/371472 [9:45:03<18:23:40, 3.76it/s] 33%|███▎ | 122564/371472 [9:45:03<18:26:02, 3.75it/s] 33%|███▎ | 122565/371472 [9:45:03<19:39:14, 3.52it/s] 33%|███▎ | 122566/371472 [9:45:04<19:36:52, 3.52it/s] 33%|███▎ | 122567/371472 [9:45:04<18:49:27, 3.67it/s] 33%|███▎ | 122568/371472 [9:45:04<18:32:47, 3.73it/s] 33%|███▎ | 122569/371472 [9:45:05<18:43:11, 3.69it/s] 33%|███▎ | 122570/371472 [9:45:05<18:18:54, 3.77it/s] 33%|███▎ | 122571/371472 [9:45:05<19:29:38, 3.55it/s] 33%|███▎ | 122572/371472 [9:45:05<19:23:50, 3.56it/s] 33%|███▎ | 122573/371472 [9:45:06<19:32:59, 3.54it/s] 33%|███▎ | 122574/371472 [9:45:06<18:45:25, 3.69it/s] 33%|███▎ | 122575/371472 [9:45:06<19:03:37, 3.63it/s] 33%|███▎ | 122576/371472 [9:45:06<19:21:55, 3.57it/s] 33%|███▎ | 122577/371472 [9:45:07<19:18:08, 3.58it/s] 33%|███▎ | 122578/371472 [9:45:07<19:24:27, 3.56it/s] 33%|███▎ | 122579/371472 [9:45:07<19:35:41, 3.53it/s] 33%|███▎ | 122580/371472 [9:45:08<19:07:25, 3.62it/s] {'loss': 3.2083, 'learning_rate': 7.033387920446465e-07, 'epoch': 5.28} + 33%|███▎ | 122580/371472 [9:45:08<19:07:25, 3.62it/s] 33%|███▎ | 122581/371472 [9:45:08<18:56:40, 3.65it/s] 33%|███▎ | 122582/371472 [9:45:08<19:36:13, 3.53it/s] 33%|███▎ | 122583/371472 [9:45:08<19:06:07, 3.62it/s] 33%|███▎ | 122584/371472 [9:45:09<19:29:31, 3.55it/s] 33%|███▎ | 122585/371472 [9:45:09<19:04:55, 3.62it/s] 33%|███▎ | 122586/371472 [9:45:09<20:44:19, 3.33it/s] 33%|███▎ | 122587/371472 [9:45:10<19:51:16, 3.48it/s] 33%|███▎ | 122588/371472 [9:45:10<19:03:31, 3.63it/s] 33%|███▎ | 122589/371472 [9:45:10<19:46:38, 3.50it/s] 33%|███▎ | 122590/371472 [9:45:10<18:50:25, 3.67it/s] 33%|███▎ | 122591/371472 [9:45:11<19:41:37, 3.51it/s] 33%|███▎ | 122592/371472 [9:45:11<18:50:01, 3.67it/s] 33%|███▎ | 122593/371472 [9:45:11<19:10:18, 3.61it/s] 33%|███▎ | 122594/371472 [9:45:12<18:48:00, 3.68it/s] 33%|███▎ | 122595/371472 [9:45:12<20:06:17, 3.44it/s] 33%|███▎ | 122596/371472 [9:45:12<19:31:50, 3.54it/s] 33%|███▎ | 122597/371472 [9:45:12<20:16:58, 3.41it/s] 33%|███▎ | 122598/371472 [9:45:13<20:08:34, 3.43it/s] 33%|███▎ | 122599/371472 [9:45:13<19:49:12, 3.49it/s] 33%|███▎ | 122600/371472 [9:45:13<19:07:05, 3.62it/s] {'loss': 3.1391, 'learning_rate': 7.032903100691676e-07, 'epoch': 5.28} + 33%|███▎ | 122600/371472 [9:45:13<19:07:05, 3.62it/s] 33%|███▎ | 122601/371472 [9:45:13<18:38:48, 3.71it/s] 33%|███▎ | 122602/371472 [9:45:14<18:43:04, 3.69it/s] 33%|███▎ | 122603/371472 [9:45:14<18:29:14, 3.74it/s] 33%|███▎ | 122604/371472 [9:45:14<18:09:29, 3.81it/s] 33%|███▎ | 122605/371472 [9:45:15<18:21:59, 3.76it/s] 33%|███▎ | 122606/371472 [9:45:15<18:02:14, 3.83it/s] 33%|███▎ | 122607/371472 [9:45:15<18:07:05, 3.82it/s] 33%|███▎ | 122608/371472 [9:45:15<18:22:03, 3.76it/s] 33%|███▎ | 122609/371472 [9:45:16<18:19:50, 3.77it/s] 33%|███▎ | 122610/371472 [9:45:16<18:51:09, 3.67it/s] 33%|███▎ | 122611/371472 [9:45:16<18:45:32, 3.69it/s] 33%|███▎ | 122612/371472 [9:45:16<18:23:20, 3.76it/s] 33%|███▎ | 122613/371472 [9:45:17<18:33:22, 3.73it/s] 33%|███▎ | 122614/371472 [9:45:17<18:45:42, 3.68it/s] 33%|███▎ | 122615/371472 [9:45:17<20:49:42, 3.32it/s] 33%|███▎ | 122616/371472 [9:45:18<20:54:38, 3.31it/s] 33%|███▎ | 122617/371472 [9:45:18<20:14:21, 3.42it/s] 33%|███▎ | 122618/371472 [9:45:18<20:44:27, 3.33it/s] 33%|███▎ | 122619/371472 [9:45:19<20:17:40, 3.41it/s] 33%|███▎ | 122620/371472 [9:45:19<19:47:56, 3.49it/s] {'loss': 3.4247, 'learning_rate': 7.032418280936887e-07, 'epoch': 5.28} + 33%|███▎ | 122620/371472 [9:45:19<19:47:56, 3.49it/s] 33%|███▎ | 122621/371472 [9:45:19<19:21:33, 3.57it/s] 33%|███▎ | 122622/371472 [9:45:19<19:09:17, 3.61it/s] 33%|███▎ | 122623/371472 [9:45:20<18:56:09, 3.65it/s] 33%|███▎ | 122624/371472 [9:45:20<19:20:16, 3.57it/s] 33%|███▎ | 122625/371472 [9:45:20<20:29:37, 3.37it/s] 33%|███▎ | 122626/371472 [9:45:20<19:30:43, 3.54it/s] 33%|███▎ | 122627/371472 [9:45:21<20:33:21, 3.36it/s] 33%|███▎ | 122628/371472 [9:45:21<19:55:07, 3.47it/s] 33%|███▎ | 122629/371472 [9:45:21<19:48:50, 3.49it/s] 33%|███▎ | 122630/371472 [9:45:22<21:05:54, 3.28it/s] 33%|███▎ | 122631/371472 [9:45:22<21:04:27, 3.28it/s] 33%|███▎ | 122632/371472 [9:45:22<21:46:30, 3.17it/s] 33%|███▎ | 122633/371472 [9:45:23<20:11:20, 3.42it/s] 33%|███▎ | 122634/371472 [9:45:23<19:59:36, 3.46it/s] 33%|███▎ | 122635/371472 [9:45:23<19:13:01, 3.60it/s] 33%|███▎ | 122636/371472 [9:45:23<19:43:49, 3.50it/s] 33%|███▎ | 122637/371472 [9:45:24<19:25:30, 3.56it/s] 33%|███▎ | 122638/371472 [9:45:24<18:24:08, 3.76it/s] 33%|███▎ | 122639/371472 [9:45:24<19:26:35, 3.55it/s] 33%|███▎ | 122640/371472 [9:45:24<18:40:29, 3.70it/s] {'loss': 3.1499, 'learning_rate': 7.031933461182099e-07, 'epoch': 5.28} + 33%|███▎ | 122640/371472 [9:45:24<18:40:29, 3.70it/s] 33%|███▎ | 122641/371472 [9:45:25<19:45:02, 3.50it/s] 33%|███▎ | 122642/371472 [9:45:25<19:31:08, 3.54it/s] 33%|███▎ | 122643/371472 [9:45:25<19:05:34, 3.62it/s] 33%|███▎ | 122644/371472 [9:45:26<18:44:13, 3.69it/s] 33%|███▎ | 122645/371472 [9:45:26<18:03:44, 3.83it/s] 33%|███▎ | 122646/371472 [9:45:26<18:23:44, 3.76it/s] 33%|███▎ | 122647/371472 [9:45:27<22:42:30, 3.04it/s] 33%|███▎ | 122648/371472 [9:45:27<21:02:55, 3.28it/s] 33%|███▎ | 122649/371472 [9:45:27<20:15:48, 3.41it/s] 33%|███▎ | 122650/371472 [9:45:27<19:53:11, 3.48it/s] 33%|███▎ | 122651/371472 [9:45:28<19:22:46, 3.57it/s] 33%|███▎ | 122652/371472 [9:45:28<19:30:22, 3.54it/s] 33%|███▎ | 122653/371472 [9:45:28<18:42:25, 3.69it/s] 33%|███▎ | 122654/371472 [9:45:28<18:36:12, 3.72it/s] 33%|███▎ | 122655/371472 [9:45:29<18:49:13, 3.67it/s] 33%|███▎ | 122656/371472 [9:45:29<18:41:15, 3.70it/s] 33%|███▎ | 122657/371472 [9:45:29<18:49:32, 3.67it/s] 33%|███▎ | 122658/371472 [9:45:29<18:06:27, 3.82it/s] 33%|███▎ | 122659/371472 [9:45:30<17:55:38, 3.86it/s] 33%|███▎ | 122660/371472 [9:45:30<20:34:54, 3.36it/s] {'loss': 3.2581, 'learning_rate': 7.031448641427309e-07, 'epoch': 5.28} + 33%|███▎ | 122660/371472 [9:45:30<20:34:54, 3.36it/s] 33%|███▎ | 122661/371472 [9:45:30<20:20:40, 3.40it/s] 33%|███▎ | 122662/371472 [9:45:31<21:20:32, 3.24it/s] 33%|███▎ | 122663/371472 [9:45:31<20:17:14, 3.41it/s] 33%|███▎ | 122664/371472 [9:45:31<20:09:12, 3.43it/s] 33%|███▎ | 122665/371472 [9:45:32<19:35:35, 3.53it/s] 33%|███▎ | 122666/371472 [9:45:32<20:54:18, 3.31it/s] 33%|███▎ | 122667/371472 [9:45:32<20:03:46, 3.44it/s] 33%|███▎ | 122668/371472 [9:45:32<19:11:00, 3.60it/s] 33%|███▎ | 122669/371472 [9:45:33<21:23:11, 3.23it/s] 33%|███▎ | 122670/371472 [9:45:33<20:57:37, 3.30it/s] 33%|███▎ | 122671/371472 [9:45:33<19:45:44, 3.50it/s] 33%|███▎ | 122672/371472 [9:45:34<19:43:18, 3.50it/s] 33%|███▎ | 122673/371472 [9:45:34<19:44:33, 3.50it/s] 33%|███▎ | 122674/371472 [9:45:34<20:05:51, 3.44it/s] 33%|███▎ | 122675/371472 [9:45:34<19:23:03, 3.57it/s] 33%|███▎ | 122676/371472 [9:45:35<19:17:36, 3.58it/s] 33%|███▎ | 122677/371472 [9:45:35<18:33:52, 3.72it/s] 33%|███▎ | 122678/371472 [9:45:35<18:34:14, 3.72it/s] 33%|███▎ | 122679/371472 [9:45:36<18:31:24, 3.73it/s] 33%|███▎ | 122680/371472 [9:45:36<17:55:45, 3.85it/s] {'loss': 3.0979, 'learning_rate': 7.030963821672519e-07, 'epoch': 5.28} + 33%|███▎ | 122680/371472 [9:45:36<17:55:45, 3.85it/s] 33%|███▎ | 122681/371472 [9:45:36<19:03:51, 3.63it/s] 33%|███▎ | 122682/371472 [9:45:36<18:56:16, 3.65it/s] 33%|███▎ | 122683/371472 [9:45:37<18:18:38, 3.77it/s] 33%|███▎ | 122684/371472 [9:45:37<18:58:20, 3.64it/s] 33%|███▎ | 122685/371472 [9:45:37<18:59:06, 3.64it/s] 33%|███▎ | 122686/371472 [9:45:37<19:20:34, 3.57it/s] 33%|███▎ | 122687/371472 [9:45:38<20:05:36, 3.44it/s] 33%|███▎ | 122688/371472 [9:45:38<19:21:42, 3.57it/s] 33%|███▎ | 122689/371472 [9:45:38<19:15:50, 3.59it/s] 33%|███▎ | 122690/371472 [9:45:39<19:09:00, 3.61it/s] 33%|███▎ | 122691/371472 [9:45:39<18:16:33, 3.78it/s] 33%|███▎ | 122692/371472 [9:45:39<21:15:47, 3.25it/s] 33%|███▎ | 122693/371472 [9:45:39<20:04:40, 3.44it/s] 33%|███▎ | 122694/371472 [9:45:40<19:33:52, 3.53it/s] 33%|███▎ | 122695/371472 [9:45:40<18:57:04, 3.65it/s] 33%|███▎ | 122696/371472 [9:45:40<18:56:04, 3.65it/s] 33%|███▎ | 122697/371472 [9:45:41<19:08:14, 3.61it/s] 33%|███▎ | 122698/371472 [9:45:41<19:13:09, 3.60it/s] 33%|███▎ | 122699/371472 [9:45:41<19:58:25, 3.46it/s] 33%|███▎ | 122700/371472 [9:45:41<18:59:13, 3.64it/s] {'loss': 3.0472, 'learning_rate': 7.030479001917731e-07, 'epoch': 5.28} + 33%|███▎ | 122700/371472 [9:45:41<18:59:13, 3.64it/s] 33%|███▎ | 122701/371472 [9:45:42<18:51:35, 3.66it/s] 33%|███▎ | 122702/371472 [9:45:42<18:26:54, 3.75it/s] 33%|███▎ | 122703/371472 [9:45:42<19:53:21, 3.47it/s] 33%|███▎ | 122704/371472 [9:45:43<19:58:13, 3.46it/s] 33%|███▎ | 122705/371472 [9:45:43<19:40:15, 3.51it/s] 33%|███▎ | 122706/371472 [9:45:43<19:41:15, 3.51it/s] 33%|███▎ | 122707/371472 [9:45:43<19:44:45, 3.50it/s] 33%|███▎ | 122708/371472 [9:45:44<21:31:16, 3.21it/s] 33%|███▎ | 122709/371472 [9:45:44<20:09:38, 3.43it/s] 33%|███▎ | 122710/371472 [9:45:44<19:51:39, 3.48it/s] 33%|███▎ | 122711/371472 [9:45:45<19:02:10, 3.63it/s] 33%|███▎ | 122712/371472 [9:45:45<19:27:38, 3.55it/s] 33%|███▎ | 122713/371472 [9:45:45<18:49:41, 3.67it/s] 33%|███▎ | 122714/371472 [9:45:45<18:28:28, 3.74it/s] 33%|███▎ | 122715/371472 [9:45:46<18:27:59, 3.74it/s] 33%|███▎ | 122716/371472 [9:45:46<23:38:29, 2.92it/s] 33%|███▎ | 122717/371472 [9:45:46<22:11:58, 3.11it/s] 33%|███▎ | 122718/371472 [9:45:47<21:18:56, 3.24it/s] 33%|███▎ | 122719/371472 [9:45:47<20:38:20, 3.35it/s] 33%|███▎ | 122720/371472 [9:45:47<22:07:00, 3.12it/s] {'loss': 3.2276, 'learning_rate': 7.029994182162943e-07, 'epoch': 5.29} + 33%|███▎ | 122720/371472 [9:45:47<22:07:00, 3.12it/s] 33%|███▎ | 122721/371472 [9:45:48<21:09:47, 3.26it/s] 33%|███▎ | 122722/371472 [9:45:48<21:12:42, 3.26it/s] 33%|███▎ | 122723/371472 [9:45:48<19:50:51, 3.48it/s] 33%|███▎ | 122724/371472 [9:45:48<19:49:28, 3.49it/s] 33%|███▎ | 122725/371472 [9:45:49<19:45:03, 3.50it/s] 33%|███▎ | 122726/371472 [9:45:49<19:01:52, 3.63it/s] 33%|███▎ | 122727/371472 [9:45:49<20:13:37, 3.42it/s] 33%|███▎ | 122728/371472 [9:45:50<20:09:45, 3.43it/s] 33%|███▎ | 122729/371472 [9:45:50<21:00:10, 3.29it/s] 33%|███▎ | 122730/371472 [9:45:50<20:51:30, 3.31it/s] 33%|███▎ | 122731/371472 [9:45:51<21:22:29, 3.23it/s] 33%|███▎ | 122732/371472 [9:45:51<19:50:53, 3.48it/s] 33%|███▎ | 122733/371472 [9:45:51<19:26:42, 3.55it/s] 33%|███▎ | 122734/371472 [9:45:51<18:32:08, 3.73it/s] 33%|███▎ | 122735/371472 [9:45:52<18:07:55, 3.81it/s] 33%|███▎ | 122736/371472 [9:45:52<18:01:24, 3.83it/s] 33%|███▎ | 122737/371472 [9:45:52<17:56:25, 3.85it/s] 33%|███▎ | 122738/371472 [9:45:52<19:12:40, 3.60it/s] 33%|███▎ | 122739/371472 [9:45:53<19:19:57, 3.57it/s] 33%|███▎ | 122740/371472 [9:45:53<19:38:30, 3.52it/s] {'loss': 3.2683, 'learning_rate': 7.029509362408154e-07, 'epoch': 5.29} + 33%|███▎ | 122740/371472 [9:45:53<19:38:30, 3.52it/s] 33%|███▎ | 122741/371472 [9:45:53<19:33:42, 3.53it/s] 33%|███▎ | 122742/371472 [9:45:54<21:25:12, 3.23it/s] 33%|███▎ | 122743/371472 [9:45:54<20:03:37, 3.44it/s] 33%|███▎ | 122744/371472 [9:45:54<19:29:22, 3.55it/s] 33%|███▎ | 122745/371472 [9:45:54<18:43:07, 3.69it/s] 33%|███▎ | 122746/371472 [9:45:55<18:10:15, 3.80it/s] 33%|███▎ | 122747/371472 [9:45:55<20:39:22, 3.34it/s] 33%|███▎ | 122748/371472 [9:45:55<19:19:33, 3.57it/s] 33%|███▎ | 122749/371472 [9:45:55<18:15:20, 3.78it/s] 33%|███▎ | 122750/371472 [9:45:56<18:34:24, 3.72it/s] 33%|███▎ | 122751/371472 [9:45:56<19:09:27, 3.61it/s] 33%|███▎ | 122752/371472 [9:45:56<19:37:06, 3.52it/s] 33%|███▎ | 122753/371472 [9:45:57<18:57:36, 3.64it/s] 33%|███▎ | 122754/371472 [9:45:57<18:30:11, 3.73it/s] 33%|███▎ | 122755/371472 [9:45:57<19:15:33, 3.59it/s] 33%|███▎ | 122756/371472 [9:45:57<18:32:39, 3.73it/s] 33%|███▎ | 122757/371472 [9:45:58<20:00:29, 3.45it/s] 33%|███▎ | 122758/371472 [9:45:58<20:18:33, 3.40it/s] 33%|███▎ | 122759/371472 [9:45:58<20:40:03, 3.34it/s] 33%|███▎ | 122760/371472 [9:45:59<19:37:22, 3.52it/s] {'loss': 3.1784, 'learning_rate': 7.029024542653364e-07, 'epoch': 5.29} + 33%|███▎ | 122760/371472 [9:45:59<19:37:22, 3.52it/s] 33%|███▎ | 122761/371472 [9:45:59<20:23:17, 3.39it/s] 33%|███▎ | 122762/371472 [9:45:59<19:21:27, 3.57it/s] 33%|███▎ | 122763/371472 [9:45:59<20:08:13, 3.43it/s] 33%|███▎ | 122764/371472 [9:46:00<20:03:54, 3.44it/s] 33%|███▎ | 122765/371472 [9:46:00<19:55:32, 3.47it/s] 33%|███▎ | 122766/371472 [9:46:00<21:50:42, 3.16it/s] 33%|███▎ | 122767/371472 [9:46:01<21:00:34, 3.29it/s] 33%|███▎ | 122768/371472 [9:46:01<20:43:37, 3.33it/s] 33%|███▎ | 122769/371472 [9:46:01<20:42:07, 3.34it/s] 33%|███▎ | 122770/371472 [9:46:02<20:19:01, 3.40it/s] 33%|███▎ | 122771/371472 [9:46:02<19:19:40, 3.57it/s] 33%|███▎ | 122772/371472 [9:46:02<18:46:27, 3.68it/s] 33%|███▎ | 122773/371472 [9:46:02<18:59:32, 3.64it/s] 33%|███▎ | 122774/371472 [9:46:03<18:34:24, 3.72it/s] 33%|███▎ | 122775/371472 [9:46:03<19:53:24, 3.47it/s] 33%|███▎ | 122776/371472 [9:46:03<19:30:31, 3.54it/s] 33%|███▎ | 122777/371472 [9:46:03<19:24:32, 3.56it/s] 33%|███▎ | 122778/371472 [9:46:04<19:27:22, 3.55it/s] 33%|███▎ | 122779/371472 [9:46:04<19:57:27, 3.46it/s] 33%|███▎ | 122780/371472 [9:46:04<19:43:38, 3.50it/s] {'loss': 3.3002, 'learning_rate': 7.028539722898576e-07, 'epoch': 5.29} + 33%|███▎ | 122780/371472 [9:46:04<19:43:38, 3.50it/s] 33%|███▎ | 122781/371472 [9:46:05<19:40:40, 3.51it/s] 33%|███▎ | 122782/371472 [9:46:05<20:14:26, 3.41it/s] 33%|███▎ | 122783/371472 [9:46:05<20:25:48, 3.38it/s] 33%|███▎ | 122784/371472 [9:46:05<19:14:06, 3.59it/s] 33%|███▎ | 122785/371472 [9:46:06<18:48:23, 3.67it/s] 33%|███▎ | 122786/371472 [9:46:06<18:20:16, 3.77it/s] 33%|███▎ | 122787/371472 [9:46:06<18:30:29, 3.73it/s] 33%|███▎ | 122788/371472 [9:46:07<18:17:50, 3.78it/s] 33%|███▎ | 122789/371472 [9:46:07<18:04:30, 3.82it/s] 33%|███▎ | 122790/371472 [9:46:07<18:39:16, 3.70it/s] 33%|███▎ | 122791/371472 [9:46:07<18:30:15, 3.73it/s] 33%|███▎ | 122792/371472 [9:46:08<18:01:55, 3.83it/s] 33%|███▎ | 122793/371472 [9:46:08<18:14:39, 3.79it/s] 33%|███▎ | 122794/371472 [9:46:08<18:19:22, 3.77it/s] 33%|███▎ | 122795/371472 [9:46:08<18:18:37, 3.77it/s] 33%|███▎ | 122796/371472 [9:46:09<18:40:43, 3.70it/s] 33%|███▎ | 122797/371472 [9:46:09<19:26:33, 3.55it/s] 33%|███▎ | 122798/371472 [9:46:09<19:04:26, 3.62it/s] 33%|███▎ | 122799/371472 [9:46:09<18:24:16, 3.75it/s] 33%|███▎ | 122800/371472 [9:46:10<19:10:08, 3.60it/s] {'loss': 3.1269, 'learning_rate': 7.028054903143786e-07, 'epoch': 5.29} + 33%|███▎ | 122800/371472 [9:46:10<19:10:08, 3.60it/s] 33%|███▎ | 122801/371472 [9:46:10<19:44:15, 3.50it/s] 33%|███▎ | 122802/371472 [9:46:10<19:47:35, 3.49it/s] 33%|███▎ | 122803/371472 [9:46:11<18:56:04, 3.65it/s] 33%|███▎ | 122804/371472 [9:46:11<19:48:38, 3.49it/s] 33%|███▎ | 122805/371472 [9:46:11<18:53:27, 3.66it/s] 33%|███▎ | 122806/371472 [9:46:11<18:35:34, 3.72it/s] 33%|███▎ | 122807/371472 [9:46:12<18:55:41, 3.65it/s] 33%|███▎ | 122808/371472 [9:46:12<19:14:05, 3.59it/s] 33%|███▎ | 122809/371472 [9:46:12<19:09:24, 3.61it/s] 33%|███▎ | 122810/371472 [9:46:13<19:29:22, 3.54it/s] 33%|███▎ | 122811/371472 [9:46:13<19:06:32, 3.61it/s] 33%|███▎ | 122812/371472 [9:46:13<19:42:16, 3.51it/s] 33%|███▎ | 122813/371472 [9:46:13<20:02:26, 3.45it/s] 33%|███▎ | 122814/371472 [9:46:14<20:28:11, 3.37it/s] 33%|███▎ | 122815/371472 [9:46:14<19:27:34, 3.55it/s] 33%|███▎ | 122816/371472 [9:46:14<19:09:57, 3.60it/s] 33%|███▎ | 122817/371472 [9:46:15<19:12:58, 3.59it/s] 33%|███▎ | 122818/371472 [9:46:15<18:53:33, 3.66it/s] 33%|███▎ | 122819/371472 [9:46:15<19:41:29, 3.51it/s] 33%|███▎ | 122820/371472 [9:46:15<19:20:06, 3.57it/s] {'loss': 3.1993, 'learning_rate': 7.027570083388997e-07, 'epoch': 5.29} + 33%|███▎ | 122820/371472 [9:46:15<19:20:06, 3.57it/s] 33%|███▎ | 122821/371472 [9:46:16<21:52:27, 3.16it/s] 33%|███▎ | 122822/371472 [9:46:16<21:07:51, 3.27it/s] 33%|███▎ | 122823/371472 [9:46:16<21:42:00, 3.18it/s] 33%|███▎ | 122824/371472 [9:46:17<22:03:26, 3.13it/s] 33%|███▎ | 122825/371472 [9:46:17<21:44:22, 3.18it/s] 33%|███▎ | 122826/371472 [9:46:17<21:03:22, 3.28it/s] 33%|███▎ | 122827/371472 [9:46:18<20:25:23, 3.38it/s] 33%|███▎ | 122828/371472 [9:46:18<20:00:29, 3.45it/s] 33%|███▎ | 122829/371472 [9:46:18<20:57:29, 3.30it/s] 33%|███▎ | 122830/371472 [9:46:18<20:34:13, 3.36it/s] 33%|███▎ | 122831/371472 [9:46:19<19:28:39, 3.55it/s] 33%|███▎ | 122832/371472 [9:46:19<19:48:33, 3.49it/s] 33%|███▎ | 122833/371472 [9:46:19<19:55:03, 3.47it/s] 33%|███▎ | 122834/371472 [9:46:20<19:36:01, 3.52it/s] 33%|███▎ | 122835/371472 [9:46:20<19:33:19, 3.53it/s] 33%|███▎ | 122836/371472 [9:46:20<19:05:55, 3.62it/s] 33%|███▎ | 122837/371472 [9:46:20<18:25:36, 3.75it/s] 33%|███▎ | 122838/371472 [9:46:21<18:27:23, 3.74it/s] 33%|███▎ | 122839/371472 [9:46:21<18:08:45, 3.81it/s] 33%|███▎ | 122840/371472 [9:46:21<19:01:36, 3.63it/s] {'loss': 3.348, 'learning_rate': 7.027085263634208e-07, 'epoch': 5.29} + 33%|███▎ | 122840/371472 [9:46:21<19:01:36, 3.63it/s] 33%|███▎ | 122841/371472 [9:46:21<18:58:44, 3.64it/s] 33%|███▎ | 122842/371472 [9:46:22<19:13:37, 3.59it/s] 33%|███▎ | 122843/371472 [9:46:22<21:43:30, 3.18it/s] 33%|███▎ | 122844/371472 [9:46:22<22:03:28, 3.13it/s] 33%|███▎ | 122845/371472 [9:46:23<20:41:56, 3.34it/s] 33%|███▎ | 122846/371472 [9:46:23<20:34:32, 3.36it/s] 33%|███▎ | 122847/371472 [9:46:23<19:34:42, 3.53it/s] 33%|███▎ | 122848/371472 [9:46:24<19:35:07, 3.53it/s] 33%|███▎ | 122849/371472 [9:46:24<20:34:45, 3.36it/s] 33%|███▎ | 122850/371472 [9:46:24<20:27:07, 3.38it/s] 33%|███▎ | 122851/371472 [9:46:24<20:14:55, 3.41it/s] 33%|███▎ | 122852/371472 [9:46:25<20:18:39, 3.40it/s] 33%|███▎ | 122853/371472 [9:46:25<20:04:21, 3.44it/s] 33%|███▎ | 122854/371472 [9:46:25<19:27:06, 3.55it/s] 33%|███▎ | 122855/371472 [9:46:26<18:46:52, 3.68it/s] 33%|███▎ | 122856/371472 [9:46:26<20:45:35, 3.33it/s] 33%|███▎ | 122857/371472 [9:46:26<20:02:42, 3.45it/s] 33%|███▎ | 122858/371472 [9:46:26<19:36:01, 3.52it/s] 33%|███▎ | 122859/371472 [9:46:27<20:03:19, 3.44it/s] 33%|███▎ | 122860/371472 [9:46:27<19:05:30, 3.62it/s] {'loss': 3.2302, 'learning_rate': 7.02660044387942e-07, 'epoch': 5.29} + 33%|███▎ | 122860/371472 [9:46:27<19:05:30, 3.62it/s] 33%|███▎ | 122861/371472 [9:46:27<19:10:09, 3.60it/s] 33%|███▎ | 122862/371472 [9:46:28<19:35:11, 3.53it/s] 33%|███▎ | 122863/371472 [9:46:28<20:15:22, 3.41it/s] 33%|███▎ | 122864/371472 [9:46:28<20:29:16, 3.37it/s] 33%|███▎ | 122865/371472 [9:46:29<20:30:09, 3.37it/s] 33%|███▎ | 122866/371472 [9:46:29<19:52:00, 3.48it/s] 33%|███▎ | 122867/371472 [9:46:29<19:07:51, 3.61it/s] 33%|███▎ | 122868/371472 [9:46:29<19:51:33, 3.48it/s] 33%|███▎ | 122869/371472 [9:46:30<19:36:38, 3.52it/s] 33%|███▎ | 122870/371472 [9:46:30<19:12:16, 3.60it/s] 33%|███▎ | 122871/371472 [9:46:30<18:25:29, 3.75it/s] 33%|███▎ | 122872/371472 [9:46:30<18:43:51, 3.69it/s] 33%|███▎ | 122873/371472 [9:46:31<20:44:36, 3.33it/s] 33%|███▎ | 122874/371472 [9:46:31<19:51:44, 3.48it/s] 33%|███▎ | 122875/371472 [9:46:31<19:08:05, 3.61it/s] 33%|███▎ | 122876/371472 [9:46:32<18:24:05, 3.75it/s] 33%|███▎ | 122877/371472 [9:46:32<18:45:01, 3.68it/s] 33%|███▎ | 122878/371472 [9:46:32<18:56:06, 3.65it/s] 33%|███▎ | 122879/371472 [9:46:32<19:00:59, 3.63it/s] 33%|███▎ | 122880/371472 [9:46:33<18:42:08, 3.69it/s] {'loss': 3.2381, 'learning_rate': 7.026115624124631e-07, 'epoch': 5.29} + 33%|███▎ | 122880/371472 [9:46:33<18:42:08, 3.69it/s] 33%|███▎ | 122881/371472 [9:46:33<18:11:29, 3.80it/s] 33%|███▎ | 122882/371472 [9:46:33<19:02:52, 3.63it/s] 33%|███▎ | 122883/371472 [9:46:33<19:22:43, 3.56it/s] 33%|███▎ | 122884/371472 [9:46:34<19:21:15, 3.57it/s] 33%|███▎ | 122885/371472 [9:46:34<18:30:04, 3.73it/s] 33%|███▎ | 122886/371472 [9:46:34<18:45:22, 3.68it/s] 33%|███▎ | 122887/371472 [9:46:35<18:41:12, 3.70it/s] 33%|███▎ | 122888/371472 [9:46:35<19:20:04, 3.57it/s] 33%|███▎ | 122889/371472 [9:46:35<19:21:42, 3.57it/s] 33%|███▎ | 122890/371472 [9:46:35<20:58:19, 3.29it/s] 33%|███▎ | 122891/371472 [9:46:36<20:19:15, 3.40it/s] 33%|███▎ | 122892/371472 [9:46:36<19:52:47, 3.47it/s] 33%|███▎ | 122893/371472 [9:46:36<18:53:16, 3.66it/s] 33%|███▎ | 122894/371472 [9:46:37<19:09:45, 3.60it/s] 33%|███▎ | 122895/371472 [9:46:37<19:25:34, 3.55it/s] 33%|███▎ | 122896/371472 [9:46:37<20:04:44, 3.44it/s] 33%|███▎ | 122897/371472 [9:46:37<20:08:17, 3.43it/s] 33%|███▎ | 122898/371472 [9:46:38<19:06:16, 3.61it/s] 33%|███▎ | 122899/371472 [9:46:38<18:23:29, 3.75it/s] 33%|███▎ | 122900/371472 [9:46:38<18:19:02, 3.77it/s] {'loss': 3.2901, 'learning_rate': 7.025630804369842e-07, 'epoch': 5.29} + 33%|███▎ | 122900/371472 [9:46:38<18:19:02, 3.77it/s] 33%|███▎ | 122901/371472 [9:46:38<18:43:52, 3.69it/s] 33%|███▎ | 122902/371472 [9:46:39<18:30:55, 3.73it/s] 33%|███▎ | 122903/371472 [9:46:39<18:46:40, 3.68it/s] 33%|███▎ | 122904/371472 [9:46:39<19:00:32, 3.63it/s] 33%|███▎ | 122905/371472 [9:46:40<18:05:11, 3.82it/s] 33%|███▎ | 122906/371472 [9:46:40<18:35:39, 3.71it/s] 33%|███▎ | 122907/371472 [9:46:40<18:35:00, 3.72it/s] 33%|███▎ | 122908/371472 [9:46:40<19:17:50, 3.58it/s] 33%|███▎ | 122909/371472 [9:46:41<18:45:40, 3.68it/s] 33%|███▎ | 122910/371472 [9:46:41<19:42:18, 3.50it/s] 33%|███▎ | 122911/371472 [9:46:41<19:11:07, 3.60it/s] 33%|███▎ | 122912/371472 [9:46:41<18:04:36, 3.82it/s] 33%|███▎ | 122913/371472 [9:46:42<18:13:29, 3.79it/s] 33%|███▎ | 122914/371472 [9:46:42<18:02:02, 3.83it/s] 33%|███▎ | 122915/371472 [9:46:42<18:39:56, 3.70it/s] 33%|███▎ | 122916/371472 [9:46:43<19:17:40, 3.58it/s] 33%|███▎ | 122917/371472 [9:46:43<18:43:56, 3.69it/s] 33%|███▎ | 122918/371472 [9:46:43<18:13:01, 3.79it/s] 33%|███▎ | 122919/371472 [9:46:43<18:24:50, 3.75it/s] 33%|███▎ | 122920/371472 [9:46:44<18:07:16, 3.81it/s] {'loss': 3.189, 'learning_rate': 7.025145984615052e-07, 'epoch': 5.29} + 33%|███▎ | 122920/371472 [9:46:44<18:07:16, 3.81it/s] 33%|███▎ | 122921/371472 [9:46:44<17:59:57, 3.84it/s] 33%|███▎ | 122922/371472 [9:46:44<17:49:30, 3.87it/s] 33%|███▎ | 122923/371472 [9:46:44<17:46:07, 3.89it/s] 33%|███▎ | 122924/371472 [9:46:45<19:13:48, 3.59it/s] 33%|███▎ | 122925/371472 [9:46:45<18:56:29, 3.64it/s] 33%|███▎ | 122926/371472 [9:46:45<19:11:47, 3.60it/s] 33%|███▎ | 122927/371472 [9:46:46<19:51:04, 3.48it/s] 33%|███▎ | 122928/371472 [9:46:46<19:12:28, 3.59it/s] 33%|███▎ | 122929/371472 [9:46:46<20:20:38, 3.39it/s] 33%|███▎ | 122930/371472 [9:46:46<20:22:38, 3.39it/s] 33%|███▎ | 122931/371472 [9:46:47<21:01:07, 3.28it/s] 33%|███▎ | 122932/371472 [9:46:47<21:27:58, 3.22it/s] 33%|███▎ | 122933/371472 [9:46:47<20:58:30, 3.29it/s] 33%|███▎ | 122934/371472 [9:46:48<21:20:49, 3.23it/s] 33%|███▎ | 122935/371472 [9:46:48<19:51:37, 3.48it/s] 33%|███▎ | 122936/371472 [9:46:48<19:25:39, 3.55it/s] 33%|███▎ | 122937/371472 [9:46:48<18:34:34, 3.72it/s] 33%|███▎ | 122938/371472 [9:46:49<17:53:29, 3.86it/s] 33%|███▎ | 122939/371472 [9:46:49<17:45:30, 3.89it/s] 33%|███▎ | 122940/371472 [9:46:49<19:54:38, 3.47it/s] {'loss': 3.3819, 'learning_rate': 7.024661164860264e-07, 'epoch': 5.3} + 33%|███▎ | 122940/371472 [9:46:49<19:54:38, 3.47it/s] 33%|███▎ | 122941/371472 [9:46:50<19:04:14, 3.62it/s] 33%|███▎ | 122942/371472 [9:46:50<18:32:34, 3.72it/s] 33%|███▎ | 122943/371472 [9:46:50<18:57:25, 3.64it/s] 33%|███▎ | 122944/371472 [9:46:50<18:48:07, 3.67it/s] 33%|███▎ | 122945/371472 [9:46:51<19:01:50, 3.63it/s] 33%|███▎ | 122946/371472 [9:46:51<18:29:39, 3.73it/s] 33%|███▎ | 122947/371472 [9:46:51<18:31:38, 3.73it/s] 33%|███▎ | 122948/371472 [9:46:51<19:23:25, 3.56it/s] 33%|███▎ | 122949/371472 [9:46:52<20:03:54, 3.44it/s] 33%|███▎ | 122950/371472 [9:46:52<20:40:12, 3.34it/s] 33%|███▎ | 122951/371472 [9:46:52<20:04:58, 3.44it/s] 33%|███▎ | 122952/371472 [9:46:53<18:59:15, 3.64it/s] 33%|███▎ | 122953/371472 [9:46:53<18:47:02, 3.68it/s] 33%|███▎ | 122954/371472 [9:46:53<19:40:06, 3.51it/s] 33%|███▎ | 122955/371472 [9:46:53<19:15:27, 3.58it/s] 33%|███▎ | 122956/371472 [9:46:54<18:46:22, 3.68it/s] 33%|███▎ | 122957/371472 [9:46:54<18:24:51, 3.75it/s] 33%|███▎ | 122958/371472 [9:46:54<20:36:52, 3.35it/s] 33%|███▎ | 122959/371472 [9:46:55<21:11:49, 3.26it/s] 33%|███▎ | 122960/371472 [9:46:55<20:19:21, 3.40it/s] {'loss': 3.2116, 'learning_rate': 7.024176345105475e-07, 'epoch': 5.3} + 33%|███▎ | 122960/371472 [9:46:55<20:19:21, 3.40it/s] 33%|███▎ | 122961/371472 [9:46:55<19:50:00, 3.48it/s] 33%|███▎ | 122962/371472 [9:46:55<20:11:26, 3.42it/s] 33%|███▎ | 122963/371472 [9:46:56<20:51:13, 3.31it/s] 33%|███▎ | 122964/371472 [9:46:56<20:24:08, 3.38it/s] 33%|███▎ | 122965/371472 [9:46:56<19:34:56, 3.53it/s] 33%|███▎ | 122966/371472 [9:46:57<19:24:09, 3.56it/s] 33%|███▎ | 122967/371472 [9:46:57<19:51:47, 3.48it/s] 33%|███▎ | 122968/371472 [9:46:57<19:31:51, 3.53it/s] 33%|███▎ | 122969/371472 [9:46:58<20:39:52, 3.34it/s] 33%|███▎ | 122970/371472 [9:46:58<19:57:51, 3.46it/s] 33%|███▎ | 122971/371472 [9:46:58<20:01:51, 3.45it/s] 33%|███▎ | 122972/371472 [9:46:58<21:19:37, 3.24it/s] 33%|███▎ | 122973/371472 [9:46:59<20:14:07, 3.41it/s] 33%|███▎ | 122974/371472 [9:46:59<20:23:14, 3.39it/s] 33%|███▎ | 122975/371472 [9:46:59<19:23:11, 3.56it/s] 33%|███▎ | 122976/371472 [9:47:00<19:17:28, 3.58it/s] 33%|███▎ | 122977/371472 [9:47:00<20:20:20, 3.39it/s] 33%|███▎ | 122978/371472 [9:47:00<19:42:25, 3.50it/s] 33%|███▎ | 122979/371472 [9:47:00<20:37:16, 3.35it/s] 33%|███▎ | 122980/371472 [9:47:01<19:46:22, 3.49it/s] {'loss': 3.2211, 'learning_rate': 7.023691525350685e-07, 'epoch': 5.3} + 33%|███▎ | 122980/371472 [9:47:01<19:46:22, 3.49it/s] 33%|███▎ | 122981/371472 [9:47:01<18:52:18, 3.66it/s] 33%|███▎ | 122982/371472 [9:47:01<18:29:05, 3.73it/s] 33%|███▎ | 122983/371472 [9:47:01<18:39:37, 3.70it/s] 33%|███▎ | 122984/371472 [9:47:02<18:28:16, 3.74it/s] 33%|███▎ | 122985/371472 [9:47:02<18:19:34, 3.77it/s] 33%|███▎ | 122986/371472 [9:47:02<18:23:04, 3.75it/s] 33%|███▎ | 122987/371472 [9:47:03<18:06:32, 3.81it/s] 33%|███▎ | 122988/371472 [9:47:03<18:37:03, 3.71it/s] 33%|███▎ | 122989/371472 [9:47:03<19:18:58, 3.57it/s] 33%|███▎ | 122990/371472 [9:47:03<19:01:05, 3.63it/s] 33%|███▎ | 122991/371472 [9:47:04<18:27:17, 3.74it/s] 33%|███▎ | 122992/371472 [9:47:04<18:48:45, 3.67it/s] 33%|███▎ | 122993/371472 [9:47:04<19:11:54, 3.60it/s] 33%|███▎ | 122994/371472 [9:47:04<18:27:24, 3.74it/s] 33%|███▎ | 122995/371472 [9:47:05<18:36:39, 3.71it/s] 33%|███▎ | 122996/371472 [9:47:05<19:06:37, 3.61it/s] 33%|███▎ | 122997/371472 [9:47:05<18:51:30, 3.66it/s] 33%|███▎ | 122998/371472 [9:47:06<19:57:16, 3.46it/s] 33%|███▎ | 122999/371472 [9:47:06<19:52:36, 3.47it/s] 33%|███▎ | 123000/371472 [9:47:06<19:42:15, 3.50it/s] {'loss': 3.3478, 'learning_rate': 7.023206705595897e-07, 'epoch': 5.3} + 33%|███▎ | 123000/371472 [9:47:06<19:42:15, 3.50it/s] 33%|███▎ | 123001/371472 [9:47:06<19:25:33, 3.55it/s] 33%|███▎ | 123002/371472 [9:47:07<19:23:55, 3.56it/s] 33%|███▎ | 123003/371472 [9:47:07<18:46:12, 3.68it/s] 33%|███▎ | 123004/371472 [9:47:07<19:32:22, 3.53it/s] 33%|███▎ | 123005/371472 [9:47:08<19:02:47, 3.62it/s] 33%|███▎ | 123006/371472 [9:47:08<18:34:56, 3.71it/s] 33%|███▎ | 123007/371472 [9:47:08<19:14:18, 3.59it/s] 33%|███▎ | 123008/371472 [9:47:08<19:09:51, 3.60it/s] 33%|███▎ | 123009/371472 [9:47:09<18:44:41, 3.68it/s] 33%|███▎ | 123010/371472 [9:47:09<18:56:14, 3.64it/s] 33%|███▎ | 123011/371472 [9:47:09<20:13:21, 3.41it/s] 33%|███▎ | 123012/371472 [9:47:10<20:36:09, 3.35it/s] 33%|███▎ | 123013/371472 [9:47:10<19:30:59, 3.54it/s] 33%|███▎ | 123014/371472 [9:47:10<19:30:56, 3.54it/s] 33%|███▎ | 123015/371472 [9:47:10<20:49:23, 3.31it/s] 33%|███▎ | 123016/371472 [9:47:11<19:35:41, 3.52it/s] 33%|███▎ | 123017/371472 [9:47:11<18:57:25, 3.64it/s] 33%|███▎ | 123018/371472 [9:47:11<18:40:22, 3.70it/s] 33%|███▎ | 123019/371472 [9:47:11<18:43:32, 3.69it/s] 33%|███▎ | 123020/371472 [9:47:12<18:25:26, 3.75it/s] {'loss': 3.1674, 'learning_rate': 7.022721885841109e-07, 'epoch': 5.3} + 33%|███▎ | 123020/371472 [9:47:12<18:25:26, 3.75it/s] 33%|███▎ | 123021/371472 [9:47:12<17:45:39, 3.89it/s] 33%|███▎ | 123022/371472 [9:47:12<18:37:30, 3.71it/s] 33%|███▎ | 123023/371472 [9:47:13<18:18:07, 3.77it/s] 33%|███▎ | 123024/371472 [9:47:13<17:53:46, 3.86it/s] 33%|███▎ | 123025/371472 [9:47:13<17:36:25, 3.92it/s] 33%|███▎ | 123026/371472 [9:47:13<18:26:50, 3.74it/s] 33%|███▎ | 123027/371472 [9:47:14<17:53:41, 3.86it/s] 33%|███▎ | 123028/371472 [9:47:14<18:31:38, 3.72it/s] 33%|███▎ | 123029/371472 [9:47:14<19:23:48, 3.56it/s] 33%|███▎ | 123030/371472 [9:47:14<19:23:30, 3.56it/s] 33%|███▎ | 123031/371472 [9:47:15<19:18:28, 3.57it/s] 33%|███▎ | 123032/371472 [9:47:15<18:39:00, 3.70it/s] 33%|███▎ | 123033/371472 [9:47:15<19:02:20, 3.62it/s] 33%|███▎ | 123034/371472 [9:47:16<19:04:26, 3.62it/s] 33%|███▎ | 123035/371472 [9:47:16<19:58:25, 3.46it/s] 33%|███▎ | 123036/371472 [9:47:16<19:20:52, 3.57it/s] 33%|███▎ | 123037/371472 [9:47:16<19:28:54, 3.54it/s] 33%|███▎ | 123038/371472 [9:47:17<19:10:48, 3.60it/s] 33%|███▎ | 123039/371472 [9:47:17<19:07:41, 3.61it/s] 33%|███▎ | 123040/371472 [9:47:17<18:30:45, 3.73it/s] {'loss': 3.3267, 'learning_rate': 7.022237066086319e-07, 'epoch': 5.3} + 33%|███▎ | 123040/371472 [9:47:17<18:30:45, 3.73it/s] 33%|███▎ | 123041/371472 [9:47:17<18:12:07, 3.79it/s] 33%|███▎ | 123042/371472 [9:47:18<19:13:09, 3.59it/s] 33%|███▎ | 123043/371472 [9:47:18<18:42:40, 3.69it/s] 33%|███▎ | 123044/371472 [9:47:18<18:41:27, 3.69it/s] 33%|███▎ | 123045/371472 [9:47:19<18:03:39, 3.82it/s] 33%|███▎ | 123046/371472 [9:47:19<18:13:02, 3.79it/s] 33%|███▎ | 123047/371472 [9:47:19<17:55:07, 3.85it/s] 33%|███▎ | 123048/371472 [9:47:19<18:28:26, 3.74it/s] 33%|███▎ | 123049/371472 [9:47:20<19:58:57, 3.45it/s] 33%|███▎ | 123050/371472 [9:47:20<19:25:46, 3.55it/s] 33%|███▎ | 123051/371472 [9:47:20<19:39:25, 3.51it/s] 33%|███▎ | 123052/371472 [9:47:20<19:22:52, 3.56it/s] 33%|███▎ | 123053/371472 [9:47:21<19:47:47, 3.49it/s] 33%|███▎ | 123054/371472 [9:47:21<19:14:02, 3.59it/s] 33%|███▎ | 123055/371472 [9:47:21<19:35:28, 3.52it/s] 33%|███▎ | 123056/371472 [9:47:22<19:55:23, 3.46it/s] 33%|███▎ | 123057/371472 [9:47:22<19:29:23, 3.54it/s] 33%|███▎ | 123058/371472 [9:47:22<19:40:25, 3.51it/s] 33%|███▎ | 123059/371472 [9:47:23<21:09:52, 3.26it/s] 33%|███▎ | 123060/371472 [9:47:23<21:10:28, 3.26it/s] {'loss': 3.2083, 'learning_rate': 7.021752246331529e-07, 'epoch': 5.3} + 33%|███▎ | 123060/371472 [9:47:23<21:10:28, 3.26it/s] 33%|███▎ | 123061/371472 [9:47:23<20:32:33, 3.36it/s] 33%|███▎ | 123062/371472 [9:47:23<19:47:09, 3.49it/s] 33%|███▎ | 123063/371472 [9:47:24<19:19:32, 3.57it/s] 33%|███▎ | 123064/371472 [9:47:24<18:52:04, 3.66it/s] 33%|███▎ | 123065/371472 [9:47:24<20:46:26, 3.32it/s] 33%|███▎ | 123066/371472 [9:47:25<21:23:55, 3.22it/s] 33%|███▎ | 123067/371472 [9:47:25<20:32:00, 3.36it/s] 33%|███▎ | 123068/371472 [9:47:25<19:44:41, 3.49it/s] 33%|███▎ | 123069/371472 [9:47:25<19:02:31, 3.62it/s] 33%|███▎ | 123070/371472 [9:47:26<18:39:51, 3.70it/s] 33%|███▎ | 123071/371472 [9:47:26<18:48:29, 3.67it/s] 33%|███▎ | 123072/371472 [9:47:26<19:18:41, 3.57it/s] 33%|███▎ | 123073/371472 [9:47:27<20:28:54, 3.37it/s] 33%|███▎ | 123074/371472 [9:47:27<19:39:05, 3.51it/s] 33%|███▎ | 123075/371472 [9:47:27<19:29:15, 3.54it/s] 33%|███▎ | 123076/371472 [9:47:27<19:26:18, 3.55it/s] 33%|███▎ | 123077/371472 [9:47:28<20:16:36, 3.40it/s] 33%|███▎ | 123078/371472 [9:47:28<21:06:50, 3.27it/s] 33%|███▎ | 123079/371472 [9:47:28<21:15:02, 3.25it/s] 33%|███▎ | 123080/371472 [9:47:29<20:55:36, 3.30it/s] {'loss': 3.1486, 'learning_rate': 7.021267426576741e-07, 'epoch': 5.3} + 33%|███▎ | 123080/371472 [9:47:29<20:55:36, 3.30it/s] 33%|███▎ | 123081/371472 [9:47:29<21:26:46, 3.22it/s] 33%|███▎ | 123082/371472 [9:47:29<20:46:43, 3.32it/s] 33%|███▎ | 123083/371472 [9:47:30<20:27:17, 3.37it/s] 33%|███▎ | 123084/371472 [9:47:30<20:01:53, 3.44it/s] 33%|███▎ | 123085/371472 [9:47:30<20:14:00, 3.41it/s] 33%|███▎ | 123086/371472 [9:47:30<20:12:41, 3.41it/s] 33%|███▎ | 123087/371472 [9:47:31<20:09:14, 3.42it/s] 33%|███▎ | 123088/371472 [9:47:31<21:35:14, 3.20it/s] 33%|███▎ | 123089/371472 [9:47:31<20:47:18, 3.32it/s] 33%|███▎ | 123090/371472 [9:47:32<20:21:48, 3.39it/s] 33%|███▎ | 123091/371472 [9:47:32<20:19:56, 3.39it/s] 33%|███▎ | 123092/371472 [9:47:32<19:42:33, 3.50it/s] 33%|███▎ | 123093/371472 [9:47:32<19:36:22, 3.52it/s] 33%|███▎ | 123094/371472 [9:47:33<19:58:04, 3.46it/s] 33%|███▎ | 123095/371472 [9:47:33<18:50:01, 3.66it/s] 33%|███▎ | 123096/371472 [9:47:33<20:31:07, 3.36it/s] 33%|███▎ | 123097/371472 [9:47:34<20:04:17, 3.44it/s] 33%|███▎ | 123098/371472 [9:47:34<19:59:25, 3.45it/s] 33%|███▎ | 123099/371472 [9:47:34<21:17:01, 3.24it/s] 33%|███▎ | 123100/371472 [9:47:35<21:16:49, 3.24it/s] {'loss': 3.1825, 'learning_rate': 7.020782606821953e-07, 'epoch': 5.3} + 33%|███▎ | 123100/371472 [9:47:35<21:16:49, 3.24it/s] 33%|███▎ | 123101/371472 [9:47:35<20:54:45, 3.30it/s] 33%|███▎ | 123102/371472 [9:47:35<20:12:18, 3.41it/s] 33%|███▎ | 123103/371472 [9:47:35<19:46:08, 3.49it/s] 33%|███▎ | 123104/371472 [9:47:36<19:22:59, 3.56it/s] 33%|███▎ | 123105/371472 [9:47:36<19:43:04, 3.50it/s] 33%|███▎ | 123106/371472 [9:47:36<19:45:07, 3.49it/s] 33%|███▎ | 123107/371472 [9:47:36<19:05:26, 3.61it/s] 33%|███▎ | 123108/371472 [9:47:37<19:38:38, 3.51it/s] 33%|███▎ | 123109/371472 [9:47:37<19:42:50, 3.50it/s] 33%|███▎ | 123110/371472 [9:47:37<20:41:09, 3.34it/s] 33%|███▎ | 123111/371472 [9:47:38<21:10:21, 3.26it/s] 33%|███▎ | 123112/371472 [9:47:38<20:35:32, 3.35it/s] 33%|███▎ | 123113/371472 [9:47:38<20:16:32, 3.40it/s] 33%|███▎ | 123114/371472 [9:47:39<20:52:11, 3.31it/s] 33%|███▎ | 123115/371472 [9:47:39<20:23:55, 3.38it/s] 33%|███▎ | 123116/371472 [9:47:39<20:04:41, 3.44it/s] 33%|███▎ | 123117/371472 [9:47:39<19:17:39, 3.58it/s] 33%|███▎ | 123118/371472 [9:47:40<18:43:39, 3.68it/s] 33%|███▎ | 123119/371472 [9:47:40<18:12:37, 3.79it/s] 33%|███▎ | 123120/371472 [9:47:40<18:24:43, 3.75it/s] {'loss': 3.4227, 'learning_rate': 7.020297787067164e-07, 'epoch': 5.3} + 33%|███▎ | 123120/371472 [9:47:40<18:24:43, 3.75it/s] 33%|███▎ | 123121/371472 [9:47:41<19:19:26, 3.57it/s] 33%|███▎ | 123122/371472 [9:47:41<19:10:57, 3.60it/s] 33%|███▎ | 123123/371472 [9:47:41<21:46:36, 3.17it/s] 33%|███▎ | 123124/371472 [9:47:41<21:03:17, 3.28it/s] 33%|███▎ | 123125/371472 [9:47:42<20:17:17, 3.40it/s] 33%|███▎ | 123126/371472 [9:47:42<20:06:09, 3.43it/s] 33%|███▎ | 123127/371472 [9:47:42<18:55:51, 3.64it/s] 33%|███▎ | 123128/371472 [9:47:43<18:04:08, 3.82it/s] 33%|███▎ | 123129/371472 [9:47:43<17:49:18, 3.87it/s] 33%|███▎ | 123130/371472 [9:47:43<18:14:05, 3.78it/s] 33%|███▎ | 123131/371472 [9:47:43<20:24:07, 3.38it/s] 33%|███▎ | 123132/371472 [9:47:44<20:39:35, 3.34it/s] 33%|███▎ | 123133/371472 [9:47:44<20:34:13, 3.35it/s] 33%|███▎ | 123134/371472 [9:47:44<20:07:52, 3.43it/s] 33%|███▎ | 123135/371472 [9:47:45<19:19:32, 3.57it/s] 33%|███▎ | 123136/371472 [9:47:45<19:44:20, 3.49it/s] 33%|███▎ | 123137/371472 [9:47:45<23:30:36, 2.93it/s] 33%|███▎ | 123138/371472 [9:47:46<21:40:50, 3.18it/s] 33%|███▎ | 123139/371472 [9:47:46<21:06:22, 3.27it/s] 33%|███▎ | 123140/371472 [9:47:46<20:07:18, 3.43it/s] {'loss': 3.102, 'learning_rate': 7.019812967312374e-07, 'epoch': 5.3} + 33%|███▎ | 123140/371472 [9:47:46<20:07:18, 3.43it/s] 33%|███▎ | 123141/371472 [9:47:46<19:03:41, 3.62it/s] 33%|███▎ | 123142/371472 [9:47:47<19:08:44, 3.60it/s] 33%|███▎ | 123143/371472 [9:47:47<19:08:42, 3.60it/s] 33%|███▎ | 123144/371472 [9:47:47<18:27:31, 3.74it/s] 33%|███▎ | 123145/371472 [9:47:47<18:53:27, 3.65it/s] 33%|███▎ | 123146/371472 [9:47:48<18:46:31, 3.67it/s] 33%|███▎ | 123147/371472 [9:47:48<18:27:47, 3.74it/s] 33%|███▎ | 123148/371472 [9:47:48<18:51:26, 3.66it/s] 33%|███▎ | 123149/371472 [9:47:49<19:30:19, 3.54it/s] 33%|███▎ | 123150/371472 [9:47:49<20:09:02, 3.42it/s] 33%|███▎ | 123151/371472 [9:47:49<20:28:36, 3.37it/s] 33%|███▎ | 123152/371472 [9:47:49<20:13:47, 3.41it/s] 33%|███▎ | 123153/371472 [9:47:50<18:53:38, 3.65it/s] 33%|███▎ | 123154/371472 [9:47:50<21:13:45, 3.25it/s] 33%|███▎ | 123155/371472 [9:47:50<20:04:54, 3.43it/s] 33%|███▎ | 123156/371472 [9:47:51<19:14:44, 3.58it/s] 33%|███▎ | 123157/371472 [9:47:51<18:54:50, 3.65it/s] 33%|███▎ | 123158/371472 [9:47:51<19:17:02, 3.58it/s] 33%|███▎ | 123159/371472 [9:47:51<20:25:10, 3.38it/s] 33%|███▎ | 123160/371472 [9:47:52<19:13:23, 3.59it/s] {'loss': 3.1699, 'learning_rate': 7.019328147557586e-07, 'epoch': 5.3} + 33%|███▎ | 123160/371472 [9:47:52<19:13:23, 3.59it/s] 33%|███▎ | 123161/371472 [9:47:52<18:34:33, 3.71it/s] 33%|███▎ | 123162/371472 [9:47:52<18:55:19, 3.65it/s] 33%|███▎ | 123163/371472 [9:47:52<18:23:58, 3.75it/s] 33%|███▎ | 123164/371472 [9:47:53<19:03:35, 3.62it/s] 33%|███▎ | 123165/371472 [9:47:53<19:13:36, 3.59it/s] 33%|███▎ | 123166/371472 [9:47:53<18:30:13, 3.73it/s] 33%|███▎ | 123167/371472 [9:47:54<19:05:35, 3.61it/s] 33%|███▎ | 123168/371472 [9:47:54<19:15:42, 3.58it/s] 33%|███▎ | 123169/371472 [9:47:54<19:17:25, 3.58it/s] 33%|███▎ | 123170/371472 [9:47:54<19:26:49, 3.55it/s] 33%|███▎ | 123171/371472 [9:47:55<18:54:11, 3.65it/s] 33%|███▎ | 123172/371472 [9:47:55<18:26:50, 3.74it/s] 33%|███▎ | 123173/371472 [9:47:55<18:47:03, 3.67it/s] 33%|███▎ | 123174/371472 [9:47:56<19:16:27, 3.58it/s] 33%|███▎ | 123175/371472 [9:47:56<20:31:50, 3.36it/s] 33%|███▎ | 123176/371472 [9:47:56<20:24:03, 3.38it/s] 33%|███▎ | 123177/371472 [9:47:57<21:02:51, 3.28it/s] 33%|███▎ | 123178/371472 [9:47:57<20:07:59, 3.43it/s] 33%|███▎ | 123179/371472 [9:47:57<20:44:07, 3.33it/s] 33%|███▎ | 123180/371472 [9:47:57<20:29:11, 3.37it/s] {'loss': 3.2191, 'learning_rate': 7.018843327802797e-07, 'epoch': 5.31} + 33%|███▎ | 123180/371472 [9:47:57<20:29:11, 3.37it/s] 33%|███▎ | 123181/371472 [9:47:58<19:41:17, 3.50it/s] 33%|███▎ | 123182/371472 [9:47:58<19:07:47, 3.61it/s] 33%|███▎ | 123183/371472 [9:47:58<19:38:08, 3.51it/s] 33%|███▎ | 123184/371472 [9:47:58<19:21:57, 3.56it/s] 33%|███▎ | 123185/371472 [9:47:59<20:30:43, 3.36it/s] 33%|███▎ | 123186/371472 [9:47:59<20:08:09, 3.43it/s] 33%|███▎ | 123187/371472 [9:47:59<20:13:29, 3.41it/s] 33%|███▎ | 123188/371472 [9:48:00<19:58:30, 3.45it/s] 33%|███▎ | 123189/371472 [9:48:00<19:17:37, 3.57it/s] 33%|███▎ | 123190/371472 [9:48:00<19:26:59, 3.55it/s] 33%|███▎ | 123191/371472 [9:48:01<19:50:46, 3.48it/s] 33%|███▎ | 123192/371472 [9:48:01<19:19:13, 3.57it/s] 33%|███▎ | 123193/371472 [9:48:01<18:36:51, 3.70it/s] 33%|███▎ | 123194/371472 [9:48:01<17:45:42, 3.88it/s] 33%|███▎ | 123195/371472 [9:48:02<19:05:01, 3.61it/s] 33%|███▎ | 123196/371472 [9:48:02<19:27:34, 3.54it/s] 33%|███▎ | 123197/371472 [9:48:02<19:24:36, 3.55it/s] 33%|███▎ | 123198/371472 [9:48:02<19:00:58, 3.63it/s] 33%|███▎ | 123199/371472 [9:48:03<18:22:42, 3.75it/s] 33%|███▎ | 123200/371472 [9:48:03<19:36:35, 3.52it/s] {'loss': 3.3449, 'learning_rate': 7.018358508048008e-07, 'epoch': 5.31} + 33%|███▎ | 123200/371472 [9:48:03<19:36:35, 3.52it/s] 33%|███▎ | 123201/371472 [9:48:03<20:02:25, 3.44it/s] 33%|███▎ | 123202/371472 [9:48:04<19:17:20, 3.58it/s] 33%|███▎ | 123203/371472 [9:48:04<18:30:07, 3.73it/s] 33%|███▎ | 123204/371472 [9:48:04<18:12:50, 3.79it/s] 33%|███▎ | 123205/371472 [9:48:04<17:38:55, 3.91it/s] 33%|███▎ | 123206/371472 [9:48:05<19:35:12, 3.52it/s] 33%|███▎ | 123207/371472 [9:48:05<19:01:56, 3.62it/s] 33%|███▎ | 123208/371472 [9:48:05<19:07:24, 3.61it/s] 33%|███▎ | 123209/371472 [9:48:05<19:10:10, 3.60it/s] 33%|███▎ | 123210/371472 [9:48:06<20:12:53, 3.41it/s] 33%|███▎ | 123211/371472 [9:48:06<19:24:51, 3.55it/s] 33%|███▎ | 123212/371472 [9:48:06<19:13:18, 3.59it/s] 33%|███▎ | 123213/371472 [9:48:07<19:29:14, 3.54it/s] 33%|███▎ | 123214/371472 [9:48:07<19:30:45, 3.53it/s] 33%|███▎ | 123215/371472 [9:48:07<19:25:34, 3.55it/s] 33%|███▎ | 123216/371472 [9:48:07<18:36:51, 3.70it/s] 33%|███▎ | 123217/371472 [9:48:08<18:45:55, 3.67it/s] 33%|███▎ | 123218/371472 [9:48:08<18:55:33, 3.64it/s] 33%|███▎ | 123219/371472 [9:48:08<19:08:39, 3.60it/s] 33%|███▎ | 123220/371472 [9:48:09<19:37:01, 3.52it/s] {'loss': 3.3355, 'learning_rate': 7.017873688293218e-07, 'epoch': 5.31} + 33%|███▎ | 123220/371472 [9:48:09<19:37:01, 3.52it/s] 33%|███▎ | 123221/371472 [9:48:09<19:56:24, 3.46it/s] 33%|███▎ | 123222/371472 [9:48:09<19:28:29, 3.54it/s] 33%|███▎ | 123223/371472 [9:48:09<19:55:43, 3.46it/s] 33%|███▎ | 123224/371472 [9:48:10<19:21:11, 3.56it/s] 33%|███▎ | 123225/371472 [9:48:10<19:24:18, 3.55it/s] 33%|███▎ | 123226/371472 [9:48:10<18:18:40, 3.77it/s] 33%|███▎ | 123227/371472 [9:48:10<18:06:13, 3.81it/s] 33%|███▎ | 123228/371472 [9:48:11<19:05:18, 3.61it/s] 33%|███▎ | 123229/371472 [9:48:11<18:45:25, 3.68it/s] 33%|███▎ | 123230/371472 [9:48:11<18:04:38, 3.81it/s] 33%|███▎ | 123231/371472 [9:48:12<18:40:00, 3.69it/s] 33%|███▎ | 123232/371472 [9:48:12<18:24:35, 3.75it/s] 33%|███▎ | 123233/371472 [9:48:12<19:40:41, 3.50it/s] 33%|███▎ | 123234/371472 [9:48:12<20:45:32, 3.32it/s] 33%|███▎ | 123235/371472 [9:48:13<20:14:43, 3.41it/s] 33%|███▎ | 123236/371472 [9:48:13<20:03:07, 3.44it/s] 33%|███▎ | 123237/371472 [9:48:13<21:08:34, 3.26it/s] 33%|███▎ | 123238/371472 [9:48:14<20:28:13, 3.37it/s] 33%|███▎ | 123239/371472 [9:48:14<20:02:47, 3.44it/s] 33%|███▎ | 123240/371472 [9:48:14<20:16:03, 3.40it/s] {'loss': 3.2185, 'learning_rate': 7.01738886853843e-07, 'epoch': 5.31} + 33%|███▎ | 123240/371472 [9:48:14<20:16:03, 3.40it/s] 33%|███▎ | 123241/371472 [9:48:15<20:42:34, 3.33it/s] 33%|███▎ | 123242/371472 [9:48:15<19:43:50, 3.49it/s] 33%|███▎ | 123243/371472 [9:48:15<18:54:46, 3.65it/s] 33%|███▎ | 123244/371472 [9:48:15<18:30:52, 3.72it/s] 33%|███▎ | 123245/371472 [9:48:16<19:00:48, 3.63it/s] 33%|███▎ | 123246/371472 [9:48:16<18:44:01, 3.68it/s] 33%|███▎ | 123247/371472 [9:48:16<18:46:36, 3.67it/s] 33%|███▎ | 123248/371472 [9:48:16<18:20:58, 3.76it/s] 33%|███▎ | 123249/371472 [9:48:17<20:25:37, 3.38it/s] 33%|███▎ | 123250/371472 [9:48:17<20:13:59, 3.41it/s] 33%|███▎ | 123251/371472 [9:48:17<19:33:58, 3.52it/s] 33%|███▎ | 123252/371472 [9:48:18<19:21:05, 3.56it/s] 33%|███▎ | 123253/371472 [9:48:18<18:54:33, 3.65it/s] 33%|███▎ | 123254/371472 [9:48:18<18:07:17, 3.80it/s] 33%|███▎ | 123255/371472 [9:48:18<18:34:02, 3.71it/s] 33%|███▎ | 123256/371472 [9:48:19<19:00:27, 3.63it/s] 33%|███▎ | 123257/371472 [9:48:19<18:25:33, 3.74it/s] 33%|███▎ | 123258/371472 [9:48:19<18:29:11, 3.73it/s] 33%|███▎ | 123259/371472 [9:48:19<18:16:23, 3.77it/s] 33%|███▎ | 123260/371472 [9:48:20<20:25:06, 3.38it/s] {'loss': 3.1446, 'learning_rate': 7.016904048783642e-07, 'epoch': 5.31} + 33%|███▎ | 123260/371472 [9:48:20<20:25:06, 3.38it/s] 33%|███▎ | 123261/371472 [9:48:20<20:11:59, 3.41it/s] 33%|███▎ | 123262/371472 [9:48:20<21:06:43, 3.27it/s] 33%|███▎ | 123263/371472 [9:48:21<20:10:31, 3.42it/s] 33%|███▎ | 123264/371472 [9:48:21<19:54:07, 3.46it/s] 33%|███▎ | 123265/371472 [9:48:21<20:52:00, 3.30it/s] 33%|███▎ | 123266/371472 [9:48:22<20:45:12, 3.32it/s] 33%|███▎ | 123267/371472 [9:48:22<19:48:37, 3.48it/s] 33%|███▎ | 123268/371472 [9:48:22<20:06:49, 3.43it/s] 33%|███▎ | 123269/371472 [9:48:22<20:00:07, 3.45it/s] 33%|███▎ | 123270/371472 [9:48:23<19:47:22, 3.48it/s] 33%|███▎ | 123271/371472 [9:48:23<20:56:32, 3.29it/s] 33%|███▎ | 123272/371472 [9:48:23<21:49:39, 3.16it/s] 33%|███▎ | 123273/371472 [9:48:24<22:55:24, 3.01it/s] 33%|███▎ | 123274/371472 [9:48:24<22:20:36, 3.09it/s] 33%|███▎ | 123275/371472 [9:48:24<23:01:45, 2.99it/s] 33%|███▎ | 123276/371472 [9:48:25<22:30:31, 3.06it/s] 33%|███▎ | 123277/371472 [9:48:25<22:36:35, 3.05it/s] 33%|███▎ | 123278/371472 [9:48:25<21:56:18, 3.14it/s] 33%|███▎ | 123279/371472 [9:48:26<22:35:55, 3.05it/s] 33%|███▎ | 123280/371472 [9:48:26<22:39:04, 3.04it/s] {'loss': 3.237, 'learning_rate': 7.016419229028853e-07, 'epoch': 5.31} + 33%|███▎ | 123280/371472 [9:48:26<22:39:04, 3.04it/s] 33%|███▎ | 123281/371472 [9:48:26<20:48:49, 3.31it/s] 33%|███▎ | 123282/371472 [9:48:27<19:41:10, 3.50it/s] 33%|███▎ | 123283/371472 [9:48:27<19:48:43, 3.48it/s] 33%|███▎ | 123284/371472 [9:48:27<20:36:58, 3.34it/s] 33%|███▎ | 123285/371472 [9:48:27<19:07:17, 3.61it/s] 33%|███▎ | 123286/371472 [9:48:28<19:02:08, 3.62it/s] 33%|███▎ | 123287/371472 [9:48:28<18:39:19, 3.70it/s] 33%|███▎ | 123288/371472 [9:48:28<18:57:05, 3.64it/s] 33%|███▎ | 123289/371472 [9:48:28<18:22:37, 3.75it/s] 33%|███▎ | 123290/371472 [9:48:29<20:00:04, 3.45it/s] 33%|███▎ | 123291/371472 [9:48:29<19:20:19, 3.56it/s] 33%|███▎ | 123292/371472 [9:48:29<19:13:56, 3.58it/s] 33%|███▎ | 123293/371472 [9:48:30<18:38:14, 3.70it/s] 33%|███▎ | 123294/371472 [9:48:30<18:24:37, 3.74it/s] 33%|███▎ | 123295/371472 [9:48:30<18:23:15, 3.75it/s] 33%|███▎ | 123296/371472 [9:48:30<18:25:03, 3.74it/s] 33%|███▎ | 123297/371472 [9:48:31<19:31:47, 3.53it/s] 33%|███▎ | 123298/371472 [9:48:31<20:40:58, 3.33it/s] 33%|███▎ | 123299/371472 [9:48:31<21:23:12, 3.22it/s] 33%|███▎ | 123300/371472 [9:48:32<21:59:08, 3.14it/s] {'loss': 3.2808, 'learning_rate': 7.015934409274062e-07, 'epoch': 5.31} + 33%|███▎ | 123300/371472 [9:48:32<21:59:08, 3.14it/s] 33%|███▎ | 123301/371472 [9:48:32<21:55:52, 3.14it/s] 33%|███▎ | 123302/371472 [9:48:32<20:30:40, 3.36it/s] 33%|███▎ | 123303/371472 [9:48:32<19:28:07, 3.54it/s] 33%|███▎ | 123304/371472 [9:48:33<19:04:06, 3.62it/s] 33%|███▎ | 123305/371472 [9:48:33<18:48:08, 3.67it/s] 33%|███▎ | 123306/371472 [9:48:33<18:37:03, 3.70it/s] 33%|███▎ | 123307/371472 [9:48:34<18:32:07, 3.72it/s] 33%|███▎ | 123308/371472 [9:48:34<17:58:44, 3.83it/s] 33%|███▎ | 123309/371472 [9:48:34<19:19:12, 3.57it/s] 33%|███▎ | 123310/371472 [9:48:34<19:13:43, 3.58it/s] 33%|███▎ | 123311/371472 [9:48:35<21:13:08, 3.25it/s] 33%|███▎ | 123312/371472 [9:48:35<20:12:53, 3.41it/s] 33%|███▎ | 123313/371472 [9:48:35<19:48:03, 3.48it/s] 33%|███▎ | 123314/371472 [9:48:36<19:46:36, 3.49it/s] 33%|███▎ | 123315/371472 [9:48:36<19:38:29, 3.51it/s] 33%|███▎ | 123316/371472 [9:48:36<19:45:13, 3.49it/s] 33%|███▎ | 123317/371472 [9:48:36<19:00:26, 3.63it/s] 33%|███▎ | 123318/371472 [9:48:37<21:24:11, 3.22it/s] 33%|███▎ | 123319/371472 [9:48:37<20:53:30, 3.30it/s] 33%|███▎ | 123320/371472 [9:48:37<20:06:24, 3.43it/s] {'loss': 3.248, 'learning_rate': 7.015449589519274e-07, 'epoch': 5.31} + 33%|███▎ | 123320/371472 [9:48:37<20:06:24, 3.43it/s] 33%|███▎ | 123321/371472 [9:48:38<21:00:40, 3.28it/s] 33%|███▎ | 123322/371472 [9:48:38<20:22:02, 3.38it/s] 33%|███▎ | 123323/371472 [9:48:38<19:37:42, 3.51it/s] 33%|███▎ | 123324/371472 [9:48:38<18:53:41, 3.65it/s] 33%|███▎ | 123325/371472 [9:48:39<18:23:40, 3.75it/s] 33%|███▎ | 123326/371472 [9:48:39<19:09:24, 3.60it/s] 33%|███▎ | 123327/371472 [9:48:39<19:30:32, 3.53it/s] 33%|███▎ | 123328/371472 [9:48:40<19:03:19, 3.62it/s] 33%|███▎ | 123329/371472 [9:48:40<18:31:57, 3.72it/s] 33%|███▎ | 123330/371472 [9:48:40<19:25:42, 3.55it/s] 33%|███▎ | 123331/371472 [9:48:40<20:12:53, 3.41it/s] 33%|███▎ | 123332/371472 [9:48:41<19:35:56, 3.52it/s] 33%|███▎ | 123333/371472 [9:48:41<19:17:07, 3.57it/s] 33%|███▎ | 123334/371472 [9:48:41<19:02:41, 3.62it/s] 33%|███▎ | 123335/371472 [9:48:42<19:04:13, 3.61it/s] 33%|███▎ | 123336/371472 [9:48:42<18:37:22, 3.70it/s] 33%|███▎ | 123337/371472 [9:48:42<18:25:06, 3.74it/s] 33%|███▎ | 123338/371472 [9:48:42<18:12:18, 3.79it/s] 33%|███▎ | 123339/371472 [9:48:43<20:17:36, 3.40it/s] 33%|███▎ | 123340/371472 [9:48:43<20:26:37, 3.37it/s] {'loss': 3.2058, 'learning_rate': 7.014964769764485e-07, 'epoch': 5.31} + 33%|███▎ | 123340/371472 [9:48:43<20:26:37, 3.37it/s] 33%|███▎ | 123341/371472 [9:48:43<21:53:53, 3.15it/s] 33%|███▎ | 123342/371472 [9:48:44<21:48:34, 3.16it/s] 33%|███▎ | 123343/371472 [9:48:44<20:48:13, 3.31it/s] 33%|███▎ | 123344/371472 [9:48:44<20:10:35, 3.42it/s] 33%|███▎ | 123345/371472 [9:48:44<19:41:23, 3.50it/s] 33%|███▎ | 123346/371472 [9:48:45<19:46:08, 3.49it/s] 33%|███▎ | 123347/371472 [9:48:45<21:22:18, 3.22it/s] 33%|███▎ | 123348/371472 [9:48:45<20:42:10, 3.33it/s] 33%|███▎ | 123349/371472 [9:48:46<21:09:10, 3.26it/s] 33%|███▎ | 123350/371472 [9:48:46<20:18:04, 3.40it/s] 33%|███▎ | 123351/371472 [9:48:46<18:59:40, 3.63it/s] 33%|███▎ | 123352/371472 [9:48:46<18:11:04, 3.79it/s] 33%|███▎ | 123353/371472 [9:48:47<19:07:40, 3.60it/s] 33%|███▎ | 123354/371472 [9:48:47<19:00:48, 3.62it/s] 33%|███▎ | 123355/371472 [9:48:47<19:07:27, 3.60it/s] 33%|███▎ | 123356/371472 [9:48:48<19:15:55, 3.58it/s] 33%|███▎ | 123357/371472 [9:48:48<18:15:29, 3.77it/s] 33%|███▎ | 123358/371472 [9:48:48<18:40:44, 3.69it/s] 33%|███▎ | 123359/371472 [9:48:48<19:54:34, 3.46it/s] 33%|███▎ | 123360/371472 [9:48:49<18:50:56, 3.66it/s] {'loss': 3.4605, 'learning_rate': 7.014479950009695e-07, 'epoch': 5.31} + 33%|███▎ | 123360/371472 [9:48:49<18:50:56, 3.66it/s] 33%|███▎ | 123361/371472 [9:48:49<18:45:24, 3.67it/s] 33%|███▎ | 123362/371472 [9:48:49<18:10:59, 3.79it/s] 33%|███▎ | 123363/371472 [9:48:49<18:32:27, 3.72it/s] 33%|███▎ | 123364/371472 [9:48:50<20:32:24, 3.36it/s] 33%|███▎ | 123365/371472 [9:48:50<20:59:39, 3.28it/s] 33%|███▎ | 123366/371472 [9:48:50<20:07:08, 3.43it/s] 33%|███▎ | 123367/371472 [9:48:51<20:40:02, 3.33it/s] 33%|███▎ | 123368/371472 [9:48:51<21:10:32, 3.25it/s] 33%|███▎ | 123369/371472 [9:48:51<20:09:56, 3.42it/s] 33%|███▎ | 123370/371472 [9:48:52<19:29:35, 3.54it/s] 33%|███▎ | 123371/371472 [9:48:52<18:51:20, 3.65it/s] 33%|███▎ | 123372/371472 [9:48:52<18:10:06, 3.79it/s] 33%|███▎ | 123373/371472 [9:48:52<17:43:50, 3.89it/s] 33%|███▎ | 123374/371472 [9:48:53<18:32:03, 3.72it/s] 33%|███▎ | 123375/371472 [9:48:53<18:38:29, 3.70it/s] 33%|███▎ | 123376/371472 [9:48:53<18:39:50, 3.69it/s] 33%|███▎ | 123377/371472 [9:48:53<19:03:04, 3.62it/s] 33%|███▎ | 123378/371472 [9:48:54<20:04:28, 3.43it/s] 33%|███▎ | 123379/371472 [9:48:54<20:06:54, 3.43it/s] 33%|███▎ | 123380/371472 [9:48:54<19:42:53, 3.50it/s] {'loss': 3.2334, 'learning_rate': 7.013995130254907e-07, 'epoch': 5.31} + 33%|███▎ | 123380/371472 [9:48:54<19:42:53, 3.50it/s] 33%|███▎ | 123381/371472 [9:48:55<19:23:26, 3.55it/s] 33%|███▎ | 123382/371472 [9:48:55<19:57:41, 3.45it/s] 33%|███▎ | 123383/371472 [9:48:55<19:25:53, 3.55it/s] 33%|███▎ | 123384/371472 [9:48:55<18:47:25, 3.67it/s] 33%|███▎ | 123385/371472 [9:48:56<20:43:21, 3.33it/s] 33%|███▎ | 123386/371472 [9:48:56<20:58:50, 3.28it/s] 33%|███▎ | 123387/371472 [9:48:56<21:32:50, 3.20it/s] 33%|███▎ | 123388/371472 [9:48:57<20:50:45, 3.31it/s] 33%|███▎ | 123389/371472 [9:48:57<20:09:36, 3.42it/s] 33%|███▎ | 123390/371472 [9:48:57<20:57:35, 3.29it/s] 33%|███▎ | 123391/371472 [9:48:58<21:32:33, 3.20it/s] 33%|███▎ | 123392/371472 [9:48:58<20:26:18, 3.37it/s] 33%|███▎ | 123393/371472 [9:48:58<19:57:41, 3.45it/s] 33%|███▎ | 123394/371472 [9:48:58<19:01:55, 3.62it/s] 33%|███▎ | 123395/371472 [9:48:59<18:16:08, 3.77it/s] 33%|███▎ | 123396/371472 [9:48:59<19:29:13, 3.54it/s] 33%|███▎ | 123397/371472 [9:48:59<19:10:09, 3.59it/s] 33%|███▎ | 123398/371472 [9:49:00<19:26:46, 3.54it/s] 33%|███▎ | 123399/371472 [9:49:00<18:59:41, 3.63it/s] 33%|███▎ | 123400/371472 [9:49:00<18:39:01, 3.69it/s] {'loss': 3.2318, 'learning_rate': 7.013510310500119e-07, 'epoch': 5.32} + 33%|███▎ | 123400/371472 [9:49:00<18:39:01, 3.69it/s] 33%|███▎ | 123401/371472 [9:49:00<19:55:05, 3.46it/s] 33%|███▎ | 123402/371472 [9:49:01<19:15:54, 3.58it/s] 33%|███▎ | 123403/371472 [9:49:01<20:05:17, 3.43it/s] 33%|███▎ | 123404/371472 [9:49:01<19:45:55, 3.49it/s] 33%|███▎ | 123405/371472 [9:49:02<20:06:30, 3.43it/s] 33%|███▎ | 123406/371472 [9:49:02<19:41:32, 3.50it/s] 33%|███▎ | 123407/371472 [9:49:02<22:07:48, 3.11it/s] 33%|███▎ | 123408/371472 [9:49:03<21:52:32, 3.15it/s] 33%|███▎ | 123409/371472 [9:49:03<21:34:29, 3.19it/s] 33%|███▎ | 123410/371472 [9:49:03<20:35:42, 3.35it/s] 33%|███▎ | 123411/371472 [9:49:03<20:22:46, 3.38it/s] 33%|███▎ | 123412/371472 [9:49:04<19:29:56, 3.53it/s] 33%|███▎ | 123413/371472 [9:49:04<20:33:55, 3.35it/s] 33%|███▎ | 123414/371472 [9:49:04<19:44:28, 3.49it/s] 33%|███▎ | 123415/371472 [9:49:04<18:37:56, 3.70it/s] 33%|███▎ | 123416/371472 [9:49:05<18:34:53, 3.71it/s] 33%|███▎ | 123417/371472 [9:49:05<19:00:17, 3.63it/s] 33%|███▎ | 123418/371472 [9:49:05<20:07:22, 3.42it/s] 33%|███▎ | 123419/371472 [9:49:06<19:26:50, 3.54it/s] 33%|███▎ | 123420/371472 [9:49:06<19:57:57, 3.45it/s] {'loss': 3.0309, 'learning_rate': 7.013025490745329e-07, 'epoch': 5.32} + 33%|███▎ | 123420/371472 [9:49:06<19:57:57, 3.45it/s] 33%|███▎ | 123421/371472 [9:49:06<20:22:54, 3.38it/s] 33%|███▎ | 123422/371472 [9:49:07<21:05:29, 3.27it/s] 33%|███▎ | 123423/371472 [9:49:07<20:25:51, 3.37it/s] 33%|███▎ | 123424/371472 [9:49:07<20:08:19, 3.42it/s] 33%|███▎ | 123425/371472 [9:49:07<19:52:31, 3.47it/s] 33%|███▎ | 123426/371472 [9:49:08<19:28:00, 3.54it/s] 33%|███▎ | 123427/371472 [9:49:08<19:15:05, 3.58it/s] 33%|███▎ | 123428/371472 [9:49:08<20:12:30, 3.41it/s] 33%|███▎ | 123429/371472 [9:49:09<19:58:59, 3.45it/s] 33%|███▎ | 123430/371472 [9:49:09<19:41:54, 3.50it/s] 33%|███▎ | 123431/371472 [9:49:09<19:00:30, 3.62it/s] 33%|███▎ | 123432/371472 [9:49:09<18:19:38, 3.76it/s] 33%|███▎ | 123433/371472 [9:49:10<18:06:00, 3.81it/s] 33%|███▎ | 123434/371472 [9:49:10<18:07:05, 3.80it/s] 33%|███▎ | 123435/371472 [9:49:10<17:56:56, 3.84it/s] 33%|███▎ | 123436/371472 [9:49:10<17:35:00, 3.92it/s] 33%|███▎ | 123437/371472 [9:49:11<17:58:30, 3.83it/s] 33%|███▎ | 123438/371472 [9:49:11<18:15:24, 3.77it/s] 33%|███▎ | 123439/371472 [9:49:11<18:05:59, 3.81it/s] 33%|███▎ | 123440/371472 [9:49:11<17:38:19, 3.91it/s] {'loss': 3.3005, 'learning_rate': 7.012540670990539e-07, 'epoch': 5.32} + 33%|███▎ | 123440/371472 [9:49:11<17:38:19, 3.91it/s] 33%|███▎ | 123441/371472 [9:49:12<17:01:42, 4.05it/s] 33%|███▎ | 123442/371472 [9:49:12<17:39:36, 3.90it/s] 33%|███▎ | 123443/371472 [9:49:12<17:47:06, 3.87it/s] 33%|███▎ | 123444/371472 [9:49:12<17:50:38, 3.86it/s] 33%|███▎ | 123445/371472 [9:49:13<17:57:36, 3.84it/s] 33%|███▎ | 123446/371472 [9:49:13<18:08:13, 3.80it/s] 33%|███▎ | 123447/371472 [9:49:13<18:14:21, 3.78it/s] 33%|███▎ | 123448/371472 [9:49:13<18:25:36, 3.74it/s] 33%|███▎ | 123449/371472 [9:49:14<17:44:14, 3.88it/s] 33%|███�� | 123450/371472 [9:49:14<18:10:37, 3.79it/s] 33%|███▎ | 123451/371472 [9:49:14<17:38:52, 3.90it/s] 33%|███▎ | 123452/371472 [9:49:15<19:35:47, 3.52it/s] 33%|███▎ | 123453/371472 [9:49:15<19:49:49, 3.47it/s] 33%|███▎ | 123454/371472 [9:49:15<19:49:22, 3.48it/s] 33%|███▎ | 123455/371472 [9:49:15<19:36:25, 3.51it/s] 33%|███▎ | 123456/371472 [9:49:16<18:57:25, 3.63it/s] 33%|███▎ | 123457/371472 [9:49:16<18:33:45, 3.71it/s] 33%|███▎ | 123458/371472 [9:49:16<18:50:47, 3.66it/s] 33%|███▎ | 123459/371472 [9:49:16<18:16:40, 3.77it/s] 33%|███▎ | 123460/371472 [9:49:17<18:33:13, 3.71it/s] {'loss': 3.2107, 'learning_rate': 7.012055851235751e-07, 'epoch': 5.32} + 33%|███▎ | 123460/371472 [9:49:17<18:33:13, 3.71it/s] 33%|███▎ | 123461/371472 [9:49:17<19:07:44, 3.60it/s] 33%|███▎ | 123462/371472 [9:49:17<20:54:34, 3.29it/s] 33%|███▎ | 123463/371472 [9:49:18<20:28:37, 3.36it/s] 33%|███▎ | 123464/371472 [9:49:18<21:03:17, 3.27it/s] 33%|███▎ | 123465/371472 [9:49:18<21:14:48, 3.24it/s] 33%|███▎ | 123466/371472 [9:49:19<20:20:54, 3.39it/s] 33%|███▎ | 123467/371472 [9:49:19<19:07:51, 3.60it/s] 33%|███▎ | 123468/371472 [9:49:19<18:36:26, 3.70it/s] 33%|███▎ | 123469/371472 [9:49:19<18:20:13, 3.76it/s] 33%|███▎ | 123470/371472 [9:49:20<18:04:09, 3.81it/s] 33%|███▎ | 123471/371472 [9:49:20<19:11:33, 3.59it/s] 33%|███▎ | 123472/371472 [9:49:20<18:58:33, 3.63it/s] 33%|███▎ | 123473/371472 [9:49:20<19:13:32, 3.58it/s] 33%|███▎ | 123474/371472 [9:49:21<18:20:32, 3.76it/s] 33%|███▎ | 123475/371472 [9:49:21<19:39:26, 3.50it/s] 33%|███▎ | 123476/371472 [9:49:21<19:29:49, 3.53it/s] 33%|███▎ | 123477/371472 [9:49:22<19:03:23, 3.61it/s] 33%|███▎ | 123478/371472 [9:49:22<18:38:50, 3.69it/s] 33%|███▎ | 123479/371472 [9:49:22<18:57:59, 3.63it/s] 33%|███▎ | 123480/371472 [9:49:22<19:06:28, 3.61it/s] {'loss': 3.2395, 'learning_rate': 7.011571031480963e-07, 'epoch': 5.32} + 33%|███▎ | 123480/371472 [9:49:22<19:06:28, 3.61it/s] 33%|███▎ | 123481/371472 [9:49:23<18:44:24, 3.68it/s] 33%|███▎ | 123482/371472 [9:49:23<18:17:27, 3.77it/s] 33%|███▎ | 123483/371472 [9:49:23<18:45:11, 3.67it/s] 33%|███▎ | 123484/371472 [9:49:24<19:54:47, 3.46it/s] 33%|███▎ | 123485/371472 [9:49:24<19:09:36, 3.60it/s] 33%|███▎ | 123486/371472 [9:49:24<19:42:04, 3.50it/s] 33%|███▎ | 123487/371472 [9:49:24<19:23:41, 3.55it/s] 33%|███▎ | 123488/371472 [9:49:25<18:18:18, 3.76it/s] 33%|███▎ | 123489/371472 [9:49:25<18:30:07, 3.72it/s] 33%|███▎ | 123490/371472 [9:49:25<18:42:45, 3.68it/s] 33%|███▎ | 123491/371472 [9:49:25<19:00:06, 3.63it/s] 33%|███▎ | 123492/371472 [9:49:26<19:05:49, 3.61it/s] 33%|███▎ | 123493/371472 [9:49:26<19:19:17, 3.57it/s] 33%|███▎ | 123494/371472 [9:49:26<18:28:47, 3.73it/s] 33%|███▎ | 123495/371472 [9:49:27<18:08:56, 3.80it/s] 33%|███▎ | 123496/371472 [9:49:27<18:19:49, 3.76it/s] 33%|███▎ | 123497/371472 [9:49:27<18:27:02, 3.73it/s] 33%|███▎ | 123498/371472 [9:49:27<18:02:44, 3.82it/s] 33%|███▎ | 123499/371472 [9:49:28<17:46:56, 3.87it/s] 33%|███▎ | 123500/371472 [9:49:28<17:20:39, 3.97it/s] {'loss': 3.3364, 'learning_rate': 7.011086211726174e-07, 'epoch': 5.32} + 33%|███▎ | 123500/371472 [9:49:28<17:20:39, 3.97it/s] 33%|███▎ | 123501/371472 [9:49:28<17:32:33, 3.93it/s] 33%|███▎ | 123502/371472 [9:49:28<19:05:10, 3.61it/s] 33%|███▎ | 123503/371472 [9:49:29<18:54:44, 3.64it/s] 33%|███▎ | 123504/371472 [9:49:29<19:06:21, 3.61it/s] 33%|███▎ | 123505/371472 [9:49:29<20:41:55, 3.33it/s] 33%|███▎ | 123506/371472 [9:49:30<19:34:49, 3.52it/s] 33%|███▎ | 123507/371472 [9:49:30<20:47:48, 3.31it/s] 33%|███▎ | 123508/371472 [9:49:30<20:02:29, 3.44it/s] 33%|███▎ | 123509/371472 [9:49:30<20:16:05, 3.40it/s] 33%|███▎ | 123510/371472 [9:49:31<21:15:16, 3.24it/s] 33%|███▎ | 123511/371472 [9:49:31<19:54:47, 3.46it/s] 33%|███▎ | 123512/371472 [9:49:31<19:30:46, 3.53it/s] 33%|███▎ | 123513/371472 [9:49:32<19:23:52, 3.55it/s] 33%|███▎ | 123514/371472 [9:49:32<19:52:59, 3.46it/s] 33%|███▎ | 123515/371472 [9:49:32<19:27:33, 3.54it/s] 33%|███▎ | 123516/371472 [9:49:32<19:09:57, 3.59it/s] 33%|███▎ | 123517/371472 [9:49:33<19:03:20, 3.61it/s] 33%|███▎ | 123518/371472 [9:49:33<18:55:27, 3.64it/s] 33%|███▎ | 123519/371472 [9:49:33<18:30:14, 3.72it/s] 33%|███▎ | 123520/371472 [9:49:33<17:59:31, 3.83it/s] {'loss': 3.3557, 'learning_rate': 7.010601391971384e-07, 'epoch': 5.32} + 33%|███▎ | 123520/371472 [9:49:33<17:59:31, 3.83it/s] 33%|███▎ | 123521/371472 [9:49:34<18:51:26, 3.65it/s] 33%|███▎ | 123522/371472 [9:49:34<19:58:16, 3.45it/s] 33%|███▎ | 123523/371472 [9:49:34<19:36:52, 3.51it/s] 33%|███▎ | 123524/371472 [9:49:35<19:29:38, 3.53it/s] 33%|███▎ | 123525/371472 [9:49:35<20:03:23, 3.43it/s] 33%|███▎ | 123526/371472 [9:49:35<19:00:39, 3.62it/s] 33%|███▎ | 123527/371472 [9:49:35<19:24:19, 3.55it/s] 33%|███▎ | 123528/371472 [9:49:36<18:52:39, 3.65it/s] 33%|███▎ | 123529/371472 [9:49:36<18:29:27, 3.72it/s] 33%|███▎ | 123530/371472 [9:49:36<18:58:20, 3.63it/s] 33%|███▎ | 123531/371472 [9:49:37<18:30:50, 3.72it/s] 33%|███▎ | 123532/371472 [9:49:37<19:38:52, 3.51it/s] 33%|███▎ | 123533/371472 [9:49:37<19:10:55, 3.59it/s] 33%|███▎ | 123534/371472 [9:49:37<18:32:58, 3.71it/s] 33%|███▎ | 123535/371472 [9:49:38<18:25:38, 3.74it/s] 33%|███▎ | 123536/371472 [9:49:38<18:31:07, 3.72it/s] 33%|███▎ | 123537/371472 [9:49:38<18:40:26, 3.69it/s] 33%|███▎ | 123538/371472 [9:49:38<18:32:06, 3.72it/s] 33%|███▎ | 123539/371472 [9:49:39<19:05:51, 3.61it/s] 33%|███▎ | 123540/371472 [9:49:39<18:40:25, 3.69it/s] {'loss': 3.2909, 'learning_rate': 7.010116572216596e-07, 'epoch': 5.32} + 33%|███▎ | 123540/371472 [9:49:39<18:40:25, 3.69it/s] 33%|███▎ | 123541/371472 [9:49:39<20:10:09, 3.41it/s] 33%|███▎ | 123542/371472 [9:49:40<19:05:27, 3.61it/s] 33%|███▎ | 123543/371472 [9:49:40<19:31:08, 3.53it/s] 33%|███▎ | 123544/371472 [9:49:40<19:58:33, 3.45it/s] 33%|███▎ | 123545/371472 [9:49:40<19:18:29, 3.57it/s] 33%|███▎ | 123546/371472 [9:49:41<19:53:45, 3.46it/s] 33%|███▎ | 123547/371472 [9:49:41<20:25:25, 3.37it/s] 33%|███▎ | 123548/371472 [9:49:41<20:08:34, 3.42it/s] 33%|███▎ | 123549/371472 [9:49:42<21:37:18, 3.19it/s] 33%|███▎ | 123550/371472 [9:49:42<21:02:37, 3.27it/s] 33%|███▎ | 123551/371472 [9:49:42<20:27:58, 3.36it/s] 33%|███▎ | 123552/371472 [9:49:43<19:24:04, 3.55it/s] 33%|███▎ | 123553/371472 [9:49:43<18:33:19, 3.71it/s] 33%|███▎ | 123554/371472 [9:49:43<18:47:18, 3.67it/s] 33%|███▎ | 123555/371472 [9:49:43<19:34:23, 3.52it/s] 33%|███▎ | 123556/371472 [9:49:44<19:10:30, 3.59it/s] 33%|███▎ | 123557/371472 [9:49:44<18:53:00, 3.65it/s] 33%|███▎ | 123558/371472 [9:49:44<19:37:28, 3.51it/s] 33%|███▎ | 123559/371472 [9:49:45<20:47:29, 3.31it/s] 33%|███▎ | 123560/371472 [9:49:45<19:55:05, 3.46it/s] {'loss': 3.1557, 'learning_rate': 7.009631752461807e-07, 'epoch': 5.32} + 33%|███▎ | 123560/371472 [9:49:45<19:55:05, 3.46it/s] 33%|███▎ | 123561/371472 [9:49:45<19:17:37, 3.57it/s] 33%|███▎ | 123562/371472 [9:49:45<19:08:26, 3.60it/s] 33%|███▎ | 123563/371472 [9:49:46<20:08:29, 3.42it/s] 33%|███▎ | 123564/371472 [9:49:46<23:20:19, 2.95it/s] 33%|███▎ | 123565/371472 [9:49:46<21:29:58, 3.20it/s] 33%|███▎ | 123566/371472 [9:49:47<21:26:52, 3.21it/s] 33%|███▎ | 123567/371472 [9:49:47<19:56:53, 3.45it/s] 33%|███▎ | 123568/371472 [9:49:47<20:04:29, 3.43it/s] 33%|███▎ | 123569/371472 [9:49:48<20:09:22, 3.42it/s] 33%|███▎ | 123570/371472 [9:49:48<19:10:15, 3.59it/s] 33%|███▎ | 123571/371472 [9:49:48<19:15:14, 3.58it/s] 33%|███▎ | 123572/371472 [9:49:48<19:42:48, 3.49it/s] 33%|███▎ | 123573/371472 [9:49:49<19:04:58, 3.61it/s] 33%|███▎ | 123574/371472 [9:49:49<19:56:21, 3.45it/s] 33%|███▎ | 123575/371472 [9:49:49<19:26:22, 3.54it/s] 33%|███▎ | 123576/371472 [9:49:49<19:06:30, 3.60it/s] 33%|███▎ | 123577/371472 [9:49:50<19:15:10, 3.58it/s] 33%|███▎ | 123578/371472 [9:49:50<18:50:03, 3.66it/s] 33%|███▎ | 123579/371472 [9:49:50<19:49:24, 3.47it/s] 33%|███▎ | 123580/371472 [9:49:51<19:24:17, 3.55it/s] {'loss': 3.1209, 'learning_rate': 7.009146932707017e-07, 'epoch': 5.32} + 33%|███▎ | 123580/371472 [9:49:51<19:24:17, 3.55it/s] 33%|███▎ | 123581/371472 [9:49:51<18:52:22, 3.65it/s] 33%|███▎ | 123582/371472 [9:49:51<19:51:20, 3.47it/s] 33%|███▎ | 123583/371472 [9:49:51<20:30:47, 3.36it/s] 33%|███▎ | 123584/371472 [9:49:52<20:28:58, 3.36it/s] 33%|███▎ | 123585/371472 [9:49:52<21:38:15, 3.18it/s] 33%|███▎ | 123586/371472 [9:49:52<20:37:19, 3.34it/s] 33%|███▎ | 123587/371472 [9:49:53<22:35:02, 3.05it/s] 33%|███▎ | 123588/371472 [9:49:53<22:11:51, 3.10it/s] 33%|███▎ | 123589/371472 [9:49:53<23:17:55, 2.96it/s] 33%|███▎ | 123590/371472 [9:49:54<21:31:49, 3.20it/s] 33%|███▎ | 123591/371472 [9:49:54<19:53:49, 3.46it/s] 33%|███▎ | 123592/371472 [9:49:54<19:45:07, 3.49it/s] 33%|███▎ | 123593/371472 [9:49:55<21:24:26, 3.22it/s] 33%|███▎ | 123594/371472 [9:49:55<22:28:15, 3.06it/s] 33%|███▎ | 123595/371472 [9:49:55<20:59:21, 3.28it/s] 33%|███▎ | 123596/371472 [9:49:55<20:14:47, 3.40it/s] 33%|███▎ | 123597/371472 [9:49:56<20:55:31, 3.29it/s] 33%|███▎ | 123598/371472 [9:49:56<20:06:36, 3.42it/s] 33%|███▎ | 123599/371472 [9:49:56<19:26:26, 3.54it/s] 33%|███▎ | 123600/371472 [9:49:57<19:04:09, 3.61it/s] {'loss': 3.2179, 'learning_rate': 7.008662112952228e-07, 'epoch': 5.32} + 33%|███▎ | 123600/371472 [9:49:57<19:04:09, 3.61it/s] 33%|███▎ | 123601/371472 [9:49:57<18:59:32, 3.63it/s] 33%|███▎ | 123602/371472 [9:49:57<18:48:06, 3.66it/s] 33%|███▎ | 123603/371472 [9:49:57<18:40:02, 3.69it/s] 33%|███▎ | 123604/371472 [9:49:58<19:08:31, 3.60it/s] 33%|███▎ | 123605/371472 [9:49:58<18:58:31, 3.63it/s] 33%|███▎ | 123606/371472 [9:49:58<19:41:58, 3.50it/s] 33%|███▎ | 123607/371472 [9:49:59<18:46:38, 3.67it/s] 33%|███▎ | 123608/371472 [9:49:59<19:27:17, 3.54it/s] 33%|███▎ | 123609/371472 [9:49:59<19:14:07, 3.58it/s] 33%|███▎ | 123610/371472 [9:49:59<18:21:03, 3.75it/s] 33%|███▎ | 123611/371472 [9:50:00<18:22:26, 3.75it/s] 33%|███▎ | 123612/371472 [9:50:00<18:27:56, 3.73it/s] 33%|███▎ | 123613/371472 [9:50:00<18:34:22, 3.71it/s] 33%|███▎ | 123614/371472 [9:50:00<18:17:48, 3.76it/s] 33%|███▎ | 123615/371472 [9:50:01<18:45:00, 3.67it/s] 33%|███▎ | 123616/371472 [9:50:01<18:59:21, 3.63it/s] 33%|███▎ | 123617/371472 [9:50:01<18:23:04, 3.74it/s] 33%|███▎ | 123618/371472 [9:50:02<18:47:13, 3.66it/s] 33%|███▎ | 123619/371472 [9:50:02<19:39:52, 3.50it/s] 33%|███▎ | 123620/371472 [9:50:02<18:55:01, 3.64it/s] {'loss': 3.227, 'learning_rate': 7.00817729319744e-07, 'epoch': 5.32} + 33%|███▎ | 123620/371472 [9:50:02<18:55:01, 3.64it/s] 33%|███▎ | 123621/371472 [9:50:02<18:19:45, 3.76it/s] 33%|███▎ | 123622/371472 [9:50:03<17:57:15, 3.83it/s] 33%|███▎ | 123623/371472 [9:50:03<19:08:51, 3.60it/s] 33%|███▎ | 123624/371472 [9:50:03<19:35:23, 3.51it/s] 33%|███▎ | 123625/371472 [9:50:03<19:12:42, 3.58it/s] 33%|███▎ | 123626/371472 [9:50:04<19:33:04, 3.52it/s] 33%|███▎ | 123627/371472 [9:50:04<18:33:10, 3.71it/s] 33%|███▎ | 123628/371472 [9:50:04<18:51:47, 3.65it/s] 33%|███▎ | 123629/371472 [9:50:05<19:10:44, 3.59it/s] 33%|███▎ | 123630/371472 [9:50:05<18:45:53, 3.67it/s] 33%|███▎ | 123631/371472 [9:50:05<18:52:32, 3.65it/s] 33%|███▎ | 123632/371472 [9:50:05<18:22:38, 3.75it/s] 33%|███▎ | 123633/371472 [9:50:06<18:37:14, 3.70it/s] 33%|███▎ | 123634/371472 [9:50:06<20:17:26, 3.39it/s] 33%|███▎ | 123635/371472 [9:50:06<20:21:18, 3.38it/s] 33%|███▎ | 123636/371472 [9:50:07<19:05:15, 3.61it/s] 33%|███▎ | 123637/371472 [9:50:07<21:02:15, 3.27it/s] 33%|███▎ | 123638/371472 [9:50:07<20:06:36, 3.42it/s] 33%|███▎ | 123639/371472 [9:50:07<19:41:19, 3.50it/s] 33%|███▎ | 123640/371472 [9:50:08<18:56:24, 3.63it/s] {'loss': 3.3198, 'learning_rate': 7.007692473442652e-07, 'epoch': 5.33} + 33%|███▎ | 123640/371472 [9:50:08<18:56:24, 3.63it/s] 33%|███▎ | 123641/371472 [9:50:08<18:34:44, 3.71it/s] 33%|███▎ | 123642/371472 [9:50:08<19:12:46, 3.58it/s] 33%|███▎ | 123643/371472 [9:50:08<18:31:40, 3.72it/s] 33%|███▎ | 123644/371472 [9:50:09<18:41:56, 3.68it/s] 33%|███▎ | 123645/371472 [9:50:09<18:58:29, 3.63it/s] 33%|███▎ | 123646/371472 [9:50:09<18:43:44, 3.68it/s] 33%|███▎ | 123647/371472 [9:50:10<18:11:18, 3.78it/s] 33%|███▎ | 123648/371472 [9:50:10<22:37:46, 3.04it/s] 33%|███▎ | 123649/371472 [9:50:10<21:18:27, 3.23it/s] 33%|███▎ | 123650/371472 [9:50:11<21:36:46, 3.19it/s] 33%|███▎ | 123651/371472 [9:50:11<20:27:50, 3.36it/s] 33%|███▎ | 123652/371472 [9:50:11<20:13:58, 3.40it/s] 33%|███▎ | 123653/371472 [9:50:11<19:38:52, 3.50it/s] 33%|███▎ | 123654/371472 [9:50:12<19:02:05, 3.62it/s] 33%|███▎ | 123655/371472 [9:50:12<18:43:01, 3.68it/s] 33%|███▎ | 123656/371472 [9:50:12<18:26:22, 3.73it/s] 33%|███▎ | 123657/371472 [9:50:12<17:57:52, 3.83it/s] 33%|███▎ | 123658/371472 [9:50:13<17:29:56, 3.93it/s] 33%|███▎ | 123659/371472 [9:50:13<16:59:15, 4.05it/s] 33%|███▎ | 123660/371472 [9:50:13<17:18:47, 3.98it/s] {'loss': 3.2398, 'learning_rate': 7.007207653687862e-07, 'epoch': 5.33} + 33%|███▎ | 123660/371472 [9:50:13<17:18:47, 3.98it/s] 33%|███▎ | 123661/371472 [9:50:13<17:25:39, 3.95it/s] 33%|███▎ | 123662/371472 [9:50:14<17:08:15, 4.02it/s] 33%|███▎ | 123663/371472 [9:50:14<18:38:10, 3.69it/s] 33%|███▎ | 123664/371472 [9:50:14<19:23:01, 3.55it/s] 33%|███▎ | 123665/371472 [9:50:15<18:53:31, 3.64it/s] 33%|███▎ | 123666/371472 [9:50:15<19:48:43, 3.47it/s] 33%|███▎ | 123667/371472 [9:50:15<19:52:50, 3.46it/s] 33%|███▎ | 123668/371472 [9:50:15<19:28:11, 3.54it/s] 33%|███▎ | 123669/371472 [9:50:16<18:49:24, 3.66it/s] 33%|███▎ | 123670/371472 [9:50:16<18:28:27, 3.73it/s] 33%|███▎ | 123671/371472 [9:50:16<18:42:42, 3.68it/s] 33%|███▎ | 123672/371472 [9:50:16<17:54:40, 3.84it/s] 33%|███▎ | 123673/371472 [9:50:17<18:15:25, 3.77it/s] 33%|███▎ | 123674/371472 [9:50:17<18:53:12, 3.64it/s] 33%|███▎ | 123675/371472 [9:50:17<19:41:44, 3.49it/s] 33%|███▎ | 123676/371472 [9:50:18<19:55:04, 3.46it/s] 33%|███▎ | 123677/371472 [9:50:18<19:54:33, 3.46it/s] 33%|███▎ | 123678/371472 [9:50:18<19:10:34, 3.59it/s] 33%|███▎ | 123679/371472 [9:50:19<21:09:52, 3.25it/s] 33%|███▎ | 123680/371472 [9:50:19<20:42:00, 3.33it/s] {'loss': 3.2511, 'learning_rate': 7.006722833933072e-07, 'epoch': 5.33} + 33%|███▎ | 123680/371472 [9:50:19<20:42:00, 3.33it/s] 33%|███▎ | 123681/371472 [9:50:19<20:12:40, 3.41it/s] 33%|███▎ | 123682/371472 [9:50:19<19:41:41, 3.49it/s] 33%|███▎ | 123683/371472 [9:50:20<19:06:51, 3.60it/s] 33%|███▎ | 123684/371472 [9:50:20<19:01:20, 3.62it/s] 33%|███▎ | 123685/371472 [9:50:20<19:01:37, 3.62it/s] 33%|███▎ | 123686/371472 [9:50:20<18:10:55, 3.79it/s] 33%|███▎ | 123687/371472 [9:50:21<19:39:00, 3.50it/s] 33%|███▎ | 123688/371472 [9:50:21<20:24:26, 3.37it/s] 33%|███▎ | 123689/371472 [9:50:21<22:33:15, 3.05it/s] 33%|███▎ | 123690/371472 [9:50:22<21:07:15, 3.26it/s] 33%|███▎ | 123691/371472 [9:50:22<20:15:15, 3.40it/s] 33%|███▎ | 123692/371472 [9:50:22<19:53:38, 3.46it/s] 33%|███▎ | 123693/371472 [9:50:23<19:21:17, 3.56it/s] 33%|███▎ | 123694/371472 [9:50:23<19:03:25, 3.61it/s] 33%|███▎ | 123695/371472 [9:50:23<18:48:21, 3.66it/s] 33%|███▎ | 123696/371472 [9:50:23<18:31:11, 3.72it/s] 33%|███▎ | 123697/371472 [9:50:24<18:51:34, 3.65it/s] 33%|███▎ | 123698/371472 [9:50:24<19:28:23, 3.53it/s] 33%|███▎ | 123699/371472 [9:50:24<20:33:13, 3.35it/s] 33%|███▎ | 123700/371472 [9:50:24<19:14:49, 3.58it/s] {'loss': 3.2067, 'learning_rate': 7.006238014178284e-07, 'epoch': 5.33} + 33%|███▎ | 123700/371472 [9:50:24<19:14:49, 3.58it/s] 33%|███▎ | 123701/371472 [9:50:25<19:36:27, 3.51it/s] 33%|███▎ | 123702/371472 [9:50:25<19:01:14, 3.62it/s] 33%|███▎ | 123703/371472 [9:50:25<18:43:16, 3.68it/s] 33%|███▎ | 123704/371472 [9:50:26<18:36:58, 3.70it/s] 33%|███▎ | 123705/371472 [9:50:26<18:40:18, 3.69it/s] 33%|███▎ | 123706/371472 [9:50:26<19:48:26, 3.47it/s] 33%|███▎ | 123707/371472 [9:50:26<20:18:04, 3.39it/s] 33%|███▎ | 123708/371472 [9:50:27<19:25:43, 3.54it/s] 33%|███▎ | 123709/371472 [9:50:27<18:54:57, 3.64it/s] 33%|███▎ | 123710/371472 [9:50:27<19:02:06, 3.62it/s] 33%|███▎ | 123711/371472 [9:50:28<18:47:24, 3.66it/s] 33%|███▎ | 123712/371472 [9:50:28<19:01:25, 3.62it/s] 33%|███▎ | 123713/371472 [9:50:28<19:39:54, 3.50it/s] 33%|███▎ | 123714/371472 [9:50:28<19:17:31, 3.57it/s] 33%|███▎ | 123715/371472 [9:50:29<19:36:15, 3.51it/s] 33%|███▎ | 123716/371472 [9:50:29<19:58:50, 3.44it/s] 33%|███▎ | 123717/371472 [9:50:29<19:24:36, 3.55it/s] 33%|███▎ | 123718/371472 [9:50:30<21:02:10, 3.27it/s] 33%|███▎ | 123719/371472 [9:50:30<21:01:51, 3.27it/s] 33%|███▎ | 123720/371472 [9:50:30<19:50:45, 3.47it/s] {'loss': 3.0998, 'learning_rate': 7.005753194423496e-07, 'epoch': 5.33} + 33%|███▎ | 123720/371472 [9:50:30<19:50:45, 3.47it/s] 33%|███▎ | 123721/371472 [9:50:30<19:54:11, 3.46it/s] 33%|███▎ | 123722/371472 [9:50:31<19:12:59, 3.58it/s] 33%|███▎ | 123723/371472 [9:50:31<19:18:37, 3.56it/s] 33%|███▎ | 123724/371472 [9:50:31<19:06:42, 3.60it/s] 33%|███▎ | 123725/371472 [9:50:32<19:21:03, 3.56it/s] 33%|███▎ | 123726/371472 [9:50:32<18:57:08, 3.63it/s] 33%|███▎ | 123727/371472 [9:50:32<18:55:02, 3.64it/s] 33%|███▎ | 123728/371472 [9:50:32<18:10:58, 3.78it/s] 33%|███▎ | 123729/371472 [9:50:33<17:49:12, 3.86it/s] 33%|███▎ | 123730/371472 [9:50:33<17:17:31, 3.98it/s] 33%|███▎ | 123731/371472 [9:50:33<17:41:05, 3.89it/s] 33%|███▎ | 123732/371472 [9:50:33<17:18:44, 3.98it/s] 33%|███▎ | 123733/371472 [9:50:34<17:53:24, 3.85it/s] 33%|███▎ | 123734/371472 [9:50:34<17:55:39, 3.84it/s] 33%|███▎ | 123735/371472 [9:50:34<17:30:31, 3.93it/s] 33%|███▎ | 123736/371472 [9:50:34<17:59:30, 3.82it/s] 33%|███▎ | 123737/371472 [9:50:35<17:20:16, 3.97it/s] 33%|███▎ | 123738/371472 [9:50:35<17:13:18, 4.00it/s] 33%|███▎ | 123739/371472 [9:50:35<17:55:00, 3.84it/s] 33%|███▎ | 123740/371472 [9:50:35<18:26:27, 3.73it/s] {'loss': 3.2079, 'learning_rate': 7.005268374668706e-07, 'epoch': 5.33} + 33%|███▎ | 123740/371472 [9:50:35<18:26:27, 3.73it/s] 33%|███▎ | 123741/371472 [9:50:36<18:33:40, 3.71it/s] 33%|███▎ | 123742/371472 [9:50:36<18:12:41, 3.78it/s] 33%|███▎ | 123743/371472 [9:50:36<17:57:58, 3.83it/s] 33%|███▎ | 123744/371472 [9:50:36<17:37:45, 3.90it/s] 33%|███▎ | 123745/371472 [9:50:37<17:25:29, 3.95it/s] 33%|███▎ | 123746/371472 [9:50:37<18:11:47, 3.78it/s] 33%|███▎ | 123747/371472 [9:50:37<20:20:15, 3.38it/s] 33%|███▎ | 123748/371472 [9:50:38<19:23:10, 3.55it/s] 33%|███▎ | 123749/371472 [9:50:38<18:56:44, 3.63it/s] 33%|███▎ | 123750/371472 [9:50:38<18:43:36, 3.67it/s] 33%|███▎ | 123751/371472 [9:50:38<18:47:54, 3.66it/s] 33%|███▎ | 123752/371472 [9:50:39<19:02:39, 3.61it/s] 33%|███▎ | 123753/371472 [9:50:39<18:26:18, 3.73it/s] 33%|███▎ | 123754/371472 [9:50:39<18:07:18, 3.80it/s] 33%|███▎ | 123755/371472 [9:50:39<18:42:54, 3.68it/s] 33%|███▎ | 123756/371472 [9:50:40<18:38:01, 3.69it/s] 33%|███▎ | 123757/371472 [9:50:40<18:51:39, 3.65it/s] 33%|███▎ | 123758/371472 [9:50:40<18:20:52, 3.75it/s] 33%|███▎ | 123759/371472 [9:50:41<19:05:03, 3.61it/s] 33%|███▎ | 123760/371472 [9:50:41<22:31:35, 3.05it/s] {'loss': 3.3591, 'learning_rate': 7.004783554913917e-07, 'epoch': 5.33} + 33%|███▎ | 123760/371472 [9:50:41<22:31:35, 3.05it/s] 33%|███▎ | 123761/371472 [9:50:41<21:37:37, 3.18it/s] 33%|███▎ | 123762/371472 [9:50:42<22:01:14, 3.12it/s] 33%|███▎ | 123763/371472 [9:50:42<20:58:10, 3.28it/s] 33%|███▎ | 123764/371472 [9:50:42<20:36:53, 3.34it/s] 33%|███▎ | 123765/371472 [9:50:42<19:42:07, 3.49it/s] 33%|███▎ | 123766/371472 [9:50:43<20:07:21, 3.42it/s] 33%|███▎ | 123767/371472 [9:50:43<19:49:43, 3.47it/s] 33%|███▎ | 123768/371472 [9:50:43<19:48:10, 3.47it/s] 33%|███▎ | 123769/371472 [9:50:44<19:39:12, 3.50it/s] 33%|███▎ | 123770/371472 [9:50:44<19:22:03, 3.55it/s] 33%|███▎ | 123771/371472 [9:50:44<18:52:40, 3.64it/s] 33%|███▎ | 123772/371472 [9:50:44<20:02:19, 3.43it/s] 33%|███▎ | 123773/371472 [9:50:45<19:18:56, 3.56it/s] 33%|███▎ | 123774/371472 [9:50:45<19:22:49, 3.55it/s] 33%|███▎ | 123775/371472 [9:50:45<18:54:29, 3.64it/s] 33%|███▎ | 123776/371472 [9:50:46<18:39:20, 3.69it/s] 33%|███▎ | 123777/371472 [9:50:46<19:19:44, 3.56it/s] 33%|███▎ | 123778/371472 [9:50:46<18:57:17, 3.63it/s] 33%|███▎ | 123779/371472 [9:50:46<18:54:24, 3.64it/s] 33%|███▎ | 123780/371472 [9:50:47<20:21:46, 3.38it/s] {'loss': 3.2563, 'learning_rate': 7.004298735159129e-07, 'epoch': 5.33} + 33%|███▎ | 123780/371472 [9:50:47<20:21:46, 3.38it/s] 33%|███▎ | 123781/371472 [9:50:47<19:31:09, 3.52it/s] 33%|███▎ | 123782/371472 [9:50:47<19:19:07, 3.56it/s] 33%|███▎ | 123783/371472 [9:50:48<18:39:47, 3.69it/s] 33%|███▎ | 123784/371472 [9:50:48<18:13:17, 3.78it/s] 33%|███▎ | 123785/371472 [9:50:48<19:16:49, 3.57it/s] 33%|███▎ | 123786/371472 [9:50:48<19:01:11, 3.62it/s] 33%|███▎ | 123787/371472 [9:50:49<18:17:10, 3.76it/s] 33%|███▎ | 123788/371472 [9:50:49<18:30:52, 3.72it/s] 33%|███▎ | 123789/371472 [9:50:49<17:56:42, 3.83it/s] 33%|███▎ | 123790/371472 [9:50:49<17:21:33, 3.96it/s] 33%|███▎ | 123791/371472 [9:50:50<18:08:02, 3.79it/s] 33%|███▎ | 123792/371472 [9:50:50<18:47:34, 3.66it/s] 33%|███▎ | 123793/371472 [9:50:50<18:31:05, 3.72it/s] 33%|███▎ | 123794/371472 [9:50:50<17:53:00, 3.85it/s] 33%|███▎ | 123795/371472 [9:50:51<18:29:19, 3.72it/s] 33%|███▎ | 123796/371472 [9:50:51<18:23:08, 3.74it/s] 33%|███▎ | 123797/371472 [9:50:51<18:46:46, 3.66it/s] 33%|███▎ | 123798/371472 [9:50:52<19:12:55, 3.58it/s] 33%|███▎ | 123799/371472 [9:50:52<19:14:53, 3.57it/s] 33%|███▎ | 123800/371472 [9:50:52<18:23:26, 3.74it/s] {'loss': 3.3802, 'learning_rate': 7.00381391540434e-07, 'epoch': 5.33} + 33%|███▎ | 123800/371472 [9:50:52<18:23:26, 3.74it/s] 33%|███▎ | 123801/371472 [9:50:52<18:14:13, 3.77it/s] 33%|███▎ | 123802/371472 [9:50:53<19:04:43, 3.61it/s] 33%|███▎ | 123803/371472 [9:50:53<18:45:17, 3.67it/s] 33%|███▎ | 123804/371472 [9:50:53<18:15:09, 3.77it/s] 33%|███▎ | 123805/371472 [9:50:53<18:20:44, 3.75it/s] 33%|███▎ | 123806/371472 [9:50:54<19:38:40, 3.50it/s] 33%|███▎ | 123807/371472 [9:50:54<19:44:00, 3.49it/s] 33%|███▎ | 123808/371472 [9:50:54<19:22:11, 3.55it/s] 33%|███▎ | 123809/371472 [9:50:55<20:18:24, 3.39it/s] 33%|███▎ | 123810/371472 [9:50:55<20:12:31, 3.40it/s] 33%|███▎ | 123811/371472 [9:50:55<19:16:45, 3.57it/s] 33%|███▎ | 123812/371472 [9:50:55<18:23:26, 3.74it/s] 33%|███▎ | 123813/371472 [9:50:56<18:34:22, 3.70it/s] 33%|███▎ | 123814/371472 [9:50:56<18:04:43, 3.81it/s] 33%|███▎ | 123815/371472 [9:50:56<18:11:00, 3.78it/s] 33%|███▎ | 123816/371472 [9:50:56<18:13:06, 3.78it/s] 33%|███▎ | 123817/371472 [9:50:57<17:49:35, 3.86it/s] 33%|███▎ | 123818/371472 [9:50:57<17:28:04, 3.94it/s] 33%|███▎ | 123819/371472 [9:50:57<17:14:48, 3.99it/s] 33%|███▎ | 123820/371472 [9:50:57<17:18:01, 3.98it/s] {'loss': 3.1963, 'learning_rate': 7.00332909564955e-07, 'epoch': 5.33} + 33%|███▎ | 123820/371472 [9:50:57<17:18:01, 3.98it/s] 33%|███▎ | 123821/371472 [9:50:58<17:55:03, 3.84it/s] 33%|███▎ | 123822/371472 [9:50:58<18:59:28, 3.62it/s] 33%|███▎ | 123823/371472 [9:50:58<19:10:07, 3.59it/s] 33%|███▎ | 123824/371472 [9:50:59<19:54:35, 3.46it/s] 33%|███▎ | 123825/371472 [9:50:59<19:16:22, 3.57it/s] 33%|███▎ | 123826/371472 [9:50:59<19:25:04, 3.54it/s] 33%|███▎ | 123827/371472 [9:50:59<18:34:05, 3.70it/s] 33%|███▎ | 123828/371472 [9:51:00<18:53:57, 3.64it/s] 33%|███▎ | 123829/371472 [9:51:00<18:22:20, 3.74it/s] 33%|███▎ | 123830/371472 [9:51:00<20:32:26, 3.35it/s] 33%|███▎ | 123831/371472 [9:51:01<20:42:58, 3.32it/s] 33%|███▎ | 123832/371472 [9:51:01<19:51:38, 3.46it/s] 33%|███▎ | 123833/371472 [9:51:01<19:14:46, 3.57it/s] 33%|███▎ | 123834/371472 [9:51:01<18:11:38, 3.78it/s] 33%|███▎ | 123835/371472 [9:51:02<18:10:33, 3.78it/s] 33%|███▎ | 123836/371472 [9:51:02<18:26:18, 3.73it/s] 33%|███▎ | 123837/371472 [9:51:02<18:33:32, 3.71it/s] 33%|███▎ | 123838/371472 [9:51:02<18:36:56, 3.70it/s] 33%|███▎ | 123839/371472 [9:51:03<18:25:08, 3.73it/s] 33%|███▎ | 123840/371472 [9:51:03<18:29:22, 3.72it/s] {'loss': 3.2951, 'learning_rate': 7.002844275894761e-07, 'epoch': 5.33} + 33%|███▎ | 123840/371472 [9:51:03<18:29:22, 3.72it/s] 33%|███▎ | 123841/371472 [9:51:03<18:23:23, 3.74it/s] 33%|███▎ | 123842/371472 [9:51:04<18:13:19, 3.77it/s] 33%|███▎ | 123843/371472 [9:51:04<18:20:49, 3.75it/s] 33%|███▎ | 123844/371472 [9:51:04<18:39:57, 3.69it/s] 33%|███▎ | 123845/371472 [9:51:04<18:15:08, 3.77it/s] 33%|███▎ | 123846/371472 [9:51:05<18:42:51, 3.68it/s] 33%|███▎ | 123847/371472 [9:51:05<19:10:53, 3.59it/s] 33%|███▎ | 123848/371472 [9:51:05<19:09:10, 3.59it/s] 33%|███▎ | 123849/371472 [9:51:05<18:18:08, 3.76it/s] 33%|███▎ | 123850/371472 [9:51:06<17:53:27, 3.84it/s] 33%|███▎ | 123851/371472 [9:51:06<18:13:09, 3.78it/s] 33%|███▎ | 123852/371472 [9:51:06<18:37:49, 3.69it/s] 33%|███▎ | 123853/371472 [9:51:06<17:54:50, 3.84it/s] 33%|███▎ | 123854/371472 [9:51:07<17:55:37, 3.84it/s] 33%|███▎ | 123855/371472 [9:51:07<17:43:25, 3.88it/s] 33%|███▎ | 123856/371472 [9:51:07<19:25:27, 3.54it/s] 33%|███▎ | 123857/371472 [9:51:08<19:01:06, 3.62it/s] 33%|███▎ | 123858/371472 [9:51:08<19:45:59, 3.48it/s] 33%|███▎ | 123859/371472 [9:51:08<19:23:40, 3.55it/s] 33%|███▎ | 123860/371472 [9:51:08<18:47:20, 3.66it/s] {'loss': 3.3228, 'learning_rate': 7.002359456139973e-07, 'epoch': 5.33} + 33%|███▎ | 123860/371472 [9:51:08<18:47:20, 3.66it/s] 33%|███▎ | 123861/371472 [9:51:09<19:02:11, 3.61it/s] 33%|███▎ | 123862/371472 [9:51:09<21:01:15, 3.27it/s] 33%|███▎ | 123863/371472 [9:51:09<21:57:46, 3.13it/s] 33%|███▎ | 123864/371472 [9:51:10<20:41:40, 3.32it/s] 33%|███▎ | 123865/371472 [9:51:10<19:44:58, 3.48it/s] 33%|███▎ | 123866/371472 [9:51:10<20:18:32, 3.39it/s] 33%|███▎ | 123867/371472 [9:51:11<19:36:53, 3.51it/s] 33%|███▎ | 123868/371472 [9:51:11<21:25:43, 3.21it/s] 33%|███▎ | 123869/371472 [9:51:11<21:13:02, 3.24it/s] 33%|███▎ | 123870/371472 [9:51:12<21:39:18, 3.18it/s] 33%|███▎ | 123871/371472 [9:51:12<21:41:52, 3.17it/s] 33%|███▎ | 123872/371472 [9:51:12<21:12:14, 3.24it/s] 33%|███▎ | 123873/371472 [9:51:12<21:12:05, 3.24it/s] 33%|███▎ | 123874/371472 [9:51:13<21:49:10, 3.15it/s] 33%|███▎ | 123875/371472 [9:51:13<20:27:10, 3.36it/s] 33%|███▎ | 123876/371472 [9:51:13<21:56:27, 3.13it/s] 33%|███▎ | 123877/371472 [9:51:14<20:27:08, 3.36it/s] 33%|███▎ | 123878/371472 [9:51:14<20:24:57, 3.37it/s] 33%|███▎ | 123879/371472 [9:51:14<19:51:18, 3.46it/s] 33%|███▎ | 123880/371472 [9:51:14<19:16:26, 3.57it/s] {'loss': 3.2656, 'learning_rate': 7.001874636385184e-07, 'epoch': 5.34} + 33%|███▎ | 123880/371472 [9:51:14<19:16:26, 3.57it/s] 33%|███▎ | 123881/371472 [9:51:15<19:36:36, 3.51it/s] 33%|███▎ | 123882/371472 [9:51:15<20:24:24, 3.37it/s] 33%|███▎ | 123883/371472 [9:51:15<20:07:36, 3.42it/s] 33%|███▎ | 123884/371472 [9:51:16<19:21:32, 3.55it/s] 33%|███▎ | 123885/371472 [9:51:16<19:18:04, 3.56it/s] 33%|███▎ | 123886/371472 [9:51:16<18:59:02, 3.62it/s] 33%|███▎ | 123887/371472 [9:51:16<18:35:41, 3.70it/s] 33%|███▎ | 123888/371472 [9:51:17<18:31:52, 3.71it/s] 33%|███▎ | 123889/371472 [9:51:17<21:06:26, 3.26it/s] 33%|███▎ | 123890/371472 [9:51:17<21:27:08, 3.21it/s] 33%|███▎ | 123891/371472 [9:51:18<21:46:50, 3.16it/s] 33%|███▎ | 123892/371472 [9:51:18<20:50:49, 3.30it/s] 33%|███▎ | 123893/371472 [9:51:18<20:45:37, 3.31it/s] 33%|███▎ | 123894/371472 [9:51:19<20:13:18, 3.40it/s] 33%|███▎ | 123895/371472 [9:51:19<19:36:48, 3.51it/s] 33%|███▎ | 123896/371472 [9:51:19<19:03:37, 3.61it/s] 33%|███▎ | 123897/371472 [9:51:19<18:54:55, 3.64it/s] 33%|███▎ | 123898/371472 [9:51:20<18:35:02, 3.70it/s] 33%|███▎ | 123899/371472 [9:51:20<18:10:52, 3.78it/s] 33%|███▎ | 123900/371472 [9:51:20<18:21:53, 3.74it/s] {'loss': 3.1612, 'learning_rate': 7.001389816630394e-07, 'epoch': 5.34} + 33%|███▎ | 123900/371472 [9:51:20<18:21:53, 3.74it/s] 33%|███▎ | 123901/371472 [9:51:20<18:36:56, 3.69it/s] 33%|███▎ | 123902/371472 [9:51:21<19:10:04, 3.59it/s] 33%|███▎ | 123903/371472 [9:51:21<19:16:32, 3.57it/s] 33%|███▎ | 123904/371472 [9:51:21<20:24:14, 3.37it/s] 33%|███▎ | 123905/371472 [9:51:22<21:04:40, 3.26it/s] 33%|███▎ | 123906/371472 [9:51:22<20:32:09, 3.35it/s] 33%|███▎ | 123907/371472 [9:51:22<19:09:13, 3.59it/s] 33%|███▎ | 123908/371472 [9:51:22<18:39:47, 3.68it/s] 33%|███▎ | 123909/371472 [9:51:23<18:28:09, 3.72it/s] 33%|███▎ | 123910/371472 [9:51:23<18:40:03, 3.68it/s] 33%|███▎ | 123911/371472 [9:51:23<18:36:22, 3.70it/s] 33%|███▎ | 123912/371472 [9:51:24<19:23:09, 3.55it/s] 33%|███▎ | 123913/371472 [9:51:24<18:39:00, 3.69it/s] 33%|███▎ | 123914/371472 [9:51:24<18:27:18, 3.73it/s] 33%|███▎ | 123915/371472 [9:51:24<18:33:21, 3.71it/s] 33%|███▎ | 123916/371472 [9:51:25<19:46:37, 3.48it/s] 33%|███▎ | 123917/371472 [9:51:25<19:56:43, 3.45it/s] 33%|███▎ | 123918/371472 [9:51:25<19:24:33, 3.54it/s] 33%|███▎ | 123919/371472 [9:51:26<18:47:09, 3.66it/s] 33%|███▎ | 123920/371472 [9:51:26<18:15:49, 3.77it/s] {'loss': 3.1975, 'learning_rate': 7.000904996875606e-07, 'epoch': 5.34} + 33%|███▎ | 123920/371472 [9:51:26<18:15:49, 3.77it/s] 33%|███▎ | 123921/371472 [9:51:26<19:25:56, 3.54it/s] 33%|███▎ | 123922/371472 [9:51:26<18:53:56, 3.64it/s] 33%|███▎ | 123923/371472 [9:51:27<18:50:36, 3.65it/s] 33%|███▎ | 123924/371472 [9:51:27<18:32:38, 3.71it/s] 33%|███▎ | 123925/371472 [9:51:27<18:26:47, 3.73it/s] 33%|███▎ | 123926/371472 [9:51:27<18:14:16, 3.77it/s] 33%|███▎ | 123927/371472 [9:51:28<18:40:49, 3.68it/s] 33%|███▎ | 123928/371472 [9:51:28<18:41:31, 3.68it/s] 33%|███▎ | 123929/371472 [9:51:28<18:45:36, 3.67it/s] 33%|███▎ | 123930/371472 [9:51:28<18:06:05, 3.80it/s] 33%|███▎ | 123931/371472 [9:51:29<17:48:10, 3.86it/s] 33%|███▎ | 123932/371472 [9:51:29<18:04:50, 3.80it/s] 33%|███▎ | 123933/371472 [9:51:29<20:15:38, 3.39it/s] 33%|███▎ | 123934/371472 [9:51:30<20:35:33, 3.34it/s] 33%|███▎ | 123935/371472 [9:51:30<20:52:15, 3.29it/s] 33%|███▎ | 123936/371472 [9:51:30<20:17:13, 3.39it/s] 33%|███▎ | 123937/371472 [9:51:31<19:28:39, 3.53it/s] 33%|███▎ | 123938/371472 [9:51:31<19:12:11, 3.58it/s] 33%|███▎ | 123939/371472 [9:51:31<18:30:51, 3.71it/s] 33%|███▎ | 123940/371472 [9:51:31<19:31:24, 3.52it/s] {'loss': 3.2904, 'learning_rate': 7.000420177120817e-07, 'epoch': 5.34} + 33%|███▎ | 123940/371472 [9:51:31<19:31:24, 3.52it/s] 33%|███▎ | 123941/371472 [9:51:32<19:00:34, 3.62it/s] 33%|███▎ | 123942/371472 [9:51:32<20:06:35, 3.42it/s] 33%|███▎ | 123943/371472 [9:51:32<20:00:35, 3.44it/s] 33%|███▎ | 123944/371472 [9:51:32<19:14:56, 3.57it/s] 33%|███▎ | 123945/371472 [9:51:33<18:25:22, 3.73it/s] 33%|███▎ | 123946/371472 [9:51:33<19:21:06, 3.55it/s] 33%|███▎ | 123947/371472 [9:51:33<18:55:41, 3.63it/s] 33%|███▎ | 123948/371472 [9:51:34<18:54:04, 3.64it/s] 33%|███▎ | 123949/371472 [9:51:34<19:43:36, 3.49it/s] 33%|███▎ | 123950/371472 [9:51:34<18:55:35, 3.63it/s] 33%|███▎ | 123951/371472 [9:51:34<19:20:39, 3.55it/s] 33%|███▎ | 123952/371472 [9:51:35<19:35:03, 3.51it/s] 33%|███▎ | 123953/371472 [9:51:35<19:16:54, 3.57it/s] 33%|███▎ | 123954/371472 [9:51:35<20:08:51, 3.41it/s] 33%|███▎ | 123955/371472 [9:51:36<19:00:41, 3.62it/s] 33%|███▎ | 123956/371472 [9:51:36<19:56:57, 3.45it/s] 33%|███▎ | 123957/371472 [9:51:36<19:14:36, 3.57it/s] 33%|███▎ | 123958/371472 [9:51:36<19:15:18, 3.57it/s] 33%|███▎ | 123959/371472 [9:51:37<19:05:32, 3.60it/s] 33%|███▎ | 123960/371472 [9:51:37<20:15:34, 3.39it/s] {'loss': 3.1987, 'learning_rate': 6.999935357366027e-07, 'epoch': 5.34} + 33%|███▎ | 123960/371472 [9:51:37<20:15:34, 3.39it/s] 33%|███▎ | 123961/371472 [9:51:37<19:37:52, 3.50it/s] 33%|███▎ | 123962/371472 [9:51:38<19:32:57, 3.52it/s] 33%|███▎ | 123963/371472 [9:51:38<18:49:17, 3.65it/s] 33%|███▎ | 123964/371472 [9:51:38<18:53:34, 3.64it/s] 33%|███▎ | 123965/371472 [9:51:38<19:30:03, 3.53it/s] 33%|███▎ | 123966/371472 [9:51:39<19:45:42, 3.48it/s] 33%|███▎ | 123967/371472 [9:51:39<19:01:33, 3.61it/s] 33%|███▎ | 123968/371472 [9:51:39<20:13:50, 3.40it/s] 33%|███▎ | 123969/371472 [9:51:40<20:16:06, 3.39it/s] 33%|███▎ | 123970/371472 [9:51:40<20:35:28, 3.34it/s] 33%|███▎ | 123971/371472 [9:51:40<20:45:00, 3.31it/s] 33%|███▎ | 123972/371472 [9:51:40<20:33:32, 3.34it/s] 33%|███▎ | 123973/371472 [9:51:41<19:30:28, 3.52it/s] 33%|███▎ | 123974/371472 [9:51:41<18:56:33, 3.63it/s] 33%|███▎ | 123975/371472 [9:51:41<19:13:29, 3.58it/s] 33%|███▎ | 123976/371472 [9:51:41<18:18:06, 3.76it/s] 33%|███▎ | 123977/371472 [9:51:42<18:23:15, 3.74it/s] 33%|███▎ | 123978/371472 [9:51:42<18:45:45, 3.66it/s] 33%|███▎ | 123979/371472 [9:51:42<20:18:07, 3.39it/s] 33%|███▎ | 123980/371472 [9:51:43<21:35:12, 3.18it/s] {'loss': 3.1677, 'learning_rate': 6.999450537611238e-07, 'epoch': 5.34} + 33%|███▎ | 123980/371472 [9:51:43<21:35:12, 3.18it/s] 33%|███▎ | 123981/371472 [9:51:43<21:59:12, 3.13it/s] 33%|███▎ | 123982/371472 [9:51:43<21:16:21, 3.23it/s] 33%|███▎ | 123983/371472 [9:51:44<20:35:16, 3.34it/s] 33%|███▎ | 123984/371472 [9:51:44<19:31:27, 3.52it/s] 33%|███▎ | 123985/371472 [9:51:44<19:43:55, 3.48it/s] 33%|███▎ | 123986/371472 [9:51:44<19:55:49, 3.45it/s] 33%|███▎ | 123987/371472 [9:51:45<19:54:56, 3.45it/s] 33%|███▎ | 123988/371472 [9:51:45<19:01:36, 3.61it/s] 33%|███▎ | 123989/371472 [9:51:45<18:51:16, 3.65it/s] 33%|███▎ | 123990/371472 [9:51:46<18:57:46, 3.63it/s] 33%|███▎ | 123991/371472 [9:51:46<18:44:11, 3.67it/s] 33%|███▎ | 123992/371472 [9:51:46<18:17:58, 3.76it/s] 33%|███▎ | 123993/371472 [9:51:46<18:23:51, 3.74it/s] 33%|███▎ | 123994/371472 [9:51:47<18:37:15, 3.69it/s] 33%|███▎ | 123995/371472 [9:51:47<19:00:29, 3.62it/s] 33%|███▎ | 123996/371472 [9:51:47<20:04:56, 3.42it/s] 33%|███▎ | 123997/371472 [9:51:48<20:46:09, 3.31it/s] 33%|███▎ | 123998/371472 [9:51:48<19:41:24, 3.49it/s] 33%|███▎ | 123999/371472 [9:51:48<19:08:27, 3.59it/s] 33%|███▎ | 124000/371472 [9:51:48<18:46:33, 3.66it/s] {'loss': 3.1516, 'learning_rate': 6.99896571785645e-07, 'epoch': 5.34} + 33%|███▎ | 124000/371472 [9:51:48<18:46:33, 3.66it/s] 33%|███▎ | 124001/371472 [9:51:49<20:34:07, 3.34it/s] 33%|███▎ | 124002/371472 [9:51:49<20:56:12, 3.28it/s] 33%|███▎ | 124003/371472 [9:51:49<20:03:47, 3.43it/s] 33%|███▎ | 124004/371472 [9:51:50<19:33:04, 3.52it/s] 33%|███▎ | 124005/371472 [9:51:50<18:26:44, 3.73it/s] 33%|███▎ | 124006/371472 [9:51:50<17:57:36, 3.83it/s] 33%|███▎ | 124007/371472 [9:51:50<20:11:46, 3.40it/s] 33%|███▎ | 124008/371472 [9:51:51<19:18:57, 3.56it/s] 33%|███▎ | 124009/371472 [9:51:51<19:47:11, 3.47it/s] 33%|███▎ | 124010/371472 [9:51:51<19:24:45, 3.54it/s] 33%|███▎ | 124011/371472 [9:51:52<19:10:11, 3.59it/s] 33%|███▎ | 124012/371472 [9:51:52<19:00:14, 3.62it/s] 33%|███▎ | 124013/371472 [9:51:52<19:26:16, 3.54it/s] 33%|███▎ | 124014/371472 [9:51:52<18:56:39, 3.63it/s] 33%|███▎ | 124015/371472 [9:51:53<18:27:06, 3.73it/s] 33%|███▎ | 124016/371472 [9:51:53<18:12:54, 3.77it/s] 33%|███▎ | 124017/371472 [9:51:53<19:46:39, 3.48it/s] 33%|███▎ | 124018/371472 [9:51:53<19:51:21, 3.46it/s] 33%|███▎ | 124019/371472 [9:51:54<19:44:44, 3.48it/s] 33%|███▎ | 124020/371472 [9:51:54<18:56:08, 3.63it/s] {'loss': 3.0876, 'learning_rate': 6.998480898101662e-07, 'epoch': 5.34} + 33%|███▎ | 124020/371472 [9:51:54<18:56:08, 3.63it/s] 33%|███▎ | 124021/371472 [9:51:54<18:20:57, 3.75it/s] 33%|███▎ | 124022/371472 [9:51:55<19:02:04, 3.61it/s] 33%|███▎ | 124023/371472 [9:51:55<18:35:54, 3.70it/s] 33%|███▎ | 124024/371472 [9:51:55<17:58:45, 3.82it/s] 33%|███▎ | 124025/371472 [9:51:55<17:32:18, 3.92it/s] 33%|███▎ | 124026/371472 [9:51:56<18:34:48, 3.70it/s] 33%|███▎ | 124027/371472 [9:51:56<19:05:19, 3.60it/s] 33%|███▎ | 124028/371472 [9:51:56<19:06:31, 3.60it/s] 33%|███▎ | 124029/371472 [9:51:56<18:27:52, 3.72it/s] 33%|███▎ | 124030/371472 [9:51:57<19:45:52, 3.48it/s] 33%|███▎ | 124031/371472 [9:51:57<19:34:45, 3.51it/s] 33%|███▎ | 124032/371472 [9:51:57<18:56:26, 3.63it/s] 33%|███▎ | 124033/371472 [9:51:58<18:51:51, 3.64it/s] 33%|███▎ | 124034/371472 [9:51:58<18:51:35, 3.64it/s] 33%|███▎ | 124035/371472 [9:51:58<18:51:46, 3.64it/s] 33%|███▎ | 124036/371472 [9:51:58<20:42:50, 3.32it/s] 33%|███▎ | 124037/371472 [9:51:59<19:54:13, 3.45it/s] 33%|███▎ | 124038/371472 [9:51:59<19:48:39, 3.47it/s] 33%|███▎ | 124039/371472 [9:51:59<19:40:22, 3.49it/s] 33%|███▎ | 124040/371472 [9:52:00<19:38:39, 3.50it/s] {'loss': 3.3544, 'learning_rate': 6.997996078346872e-07, 'epoch': 5.34} + 33%|███▎ | 124040/371472 [9:52:00<19:38:39, 3.50it/s] 33%|███▎ | 124041/371472 [9:52:00<18:51:20, 3.65it/s] 33%|███▎ | 124042/371472 [9:52:00<18:52:23, 3.64it/s] 33%|███▎ | 124043/371472 [9:52:00<18:56:32, 3.63it/s] 33%|███▎ | 124044/371472 [9:52:01<18:17:54, 3.76it/s] 33%|███▎ | 124045/371472 [9:52:01<19:24:04, 3.54it/s] 33%|███▎ | 124046/371472 [9:52:01<18:55:47, 3.63it/s] 33%|███▎ | 124047/371472 [9:52:01<19:12:24, 3.58it/s] 33%|███▎ | 124048/371472 [9:52:02<18:45:16, 3.66it/s] 33%|███▎ | 124049/371472 [9:52:02<19:52:55, 3.46it/s] 33%|███▎ | 124050/371472 [9:52:02<19:18:15, 3.56it/s] 33%|███▎ | 124051/371472 [9:52:03<18:51:41, 3.64it/s] 33%|███▎ | 124052/371472 [9:52:03<18:17:28, 3.76it/s] 33%|███▎ | 124053/371472 [9:52:03<18:45:42, 3.66it/s] 33%|███▎ | 124054/371472 [9:52:03<18:55:01, 3.63it/s] 33%|███▎ | 124055/371472 [9:52:04<18:43:20, 3.67it/s] 33%|███▎ | 124056/371472 [9:52:04<18:37:19, 3.69it/s] 33%|███▎ | 124057/371472 [9:52:04<19:10:09, 3.59it/s] 33%|███▎ | 124058/371472 [9:52:05<20:19:49, 3.38it/s] 33%|███▎ | 124059/371472 [9:52:05<19:14:02, 3.57it/s] 33%|███▎ | 124060/371472 [9:52:05<19:08:32, 3.59it/s] {'loss': 3.1773, 'learning_rate': 6.997511258592082e-07, 'epoch': 5.34} + 33%|███▎ | 124060/371472 [9:52:05<19:08:32, 3.59it/s] 33%|███▎ | 124061/371472 [9:52:05<19:13:50, 3.57it/s] 33%|███▎ | 124062/371472 [9:52:06<19:34:11, 3.51it/s] 33%|███▎ | 124063/371472 [9:52:06<19:10:02, 3.59it/s] 33%|███▎ | 124064/371472 [9:52:06<18:24:59, 3.73it/s] 33%|███▎ | 124065/371472 [9:52:06<19:15:04, 3.57it/s] 33%|███▎ | 124066/371472 [9:52:07<18:36:32, 3.69it/s] 33%|███▎ | 124067/371472 [9:52:07<19:09:21, 3.59it/s] 33%|███▎ | 124068/371472 [9:52:07<19:04:47, 3.60it/s] 33%|███▎ | 124069/371472 [9:52:08<20:09:03, 3.41it/s] 33%|███▎ | 124070/371472 [9:52:08<20:46:00, 3.31it/s] 33%|███▎ | 124071/371472 [9:52:08<20:22:46, 3.37it/s] 33%|███▎ | 124072/371472 [9:52:09<21:13:33, 3.24it/s] 33%|███▎ | 124073/371472 [9:52:09<20:58:26, 3.28it/s] 33%|███▎ | 124074/371472 [9:52:09<20:26:01, 3.36it/s] 33%|███▎ | 124075/371472 [9:52:09<20:15:50, 3.39it/s] 33%|███▎ | 124076/371472 [9:52:10<19:06:05, 3.60it/s] 33%|███▎ | 124077/371472 [9:52:10<19:58:30, 3.44it/s] 33%|███▎ | 124078/371472 [9:52:10<21:49:47, 3.15it/s] 33%|███▎ | 124079/371472 [9:52:11<20:40:59, 3.32it/s] 33%|███▎ | 124080/371472 [9:52:11<19:36:25, 3.50it/s] {'loss': 3.1361, 'learning_rate': 6.997026438837294e-07, 'epoch': 5.34} + 33%|███▎ | 124080/371472 [9:52:11<19:36:25, 3.50it/s] 33%|███▎ | 124081/371472 [9:52:11<19:34:26, 3.51it/s] 33%|███▎ | 124082/371472 [9:52:11<19:33:19, 3.51it/s] 33%|███▎ | 124083/371472 [9:52:12<20:47:53, 3.30it/s] 33%|███▎ | 124084/371472 [9:52:12<22:05:57, 3.11it/s] 33%|███▎ | 124085/371472 [9:52:12<21:14:41, 3.23it/s] 33%|███▎ | 124086/371472 [9:52:13<20:29:11, 3.35it/s] 33%|███▎ | 124087/371472 [9:52:13<20:30:13, 3.35it/s] 33%|███▎ | 124088/371472 [9:52:13<21:14:53, 3.23it/s] 33%|███▎ | 124089/371472 [9:52:14<20:12:37, 3.40it/s] 33%|███▎ | 124090/371472 [9:52:14<19:14:18, 3.57it/s] 33%|███▎ | 124091/371472 [9:52:14<18:45:07, 3.66it/s] 33%|███▎ | 124092/371472 [9:52:14<18:44:11, 3.67it/s] 33%|███▎ | 124093/371472 [9:52:15<18:41:26, 3.68it/s] 33%|███▎ | 124094/371472 [9:52:15<18:15:09, 3.76it/s] 33%|███▎ | 124095/371472 [9:52:15<18:30:39, 3.71it/s] 33%|███▎ | 124096/371472 [9:52:15<19:02:08, 3.61it/s] 33%|███▎ | 124097/371472 [9:52:16<19:32:42, 3.52it/s] 33%|███▎ | 124098/371472 [9:52:16<19:37:08, 3.50it/s] 33%|███▎ | 124099/371472 [9:52:16<19:31:02, 3.52it/s] 33%|███▎ | 124100/371472 [9:52:17<19:41:21, 3.49it/s] {'loss': 3.0797, 'learning_rate': 6.996541619082506e-07, 'epoch': 5.35} + 33%|███▎ | 124100/371472 [9:52:17<19:41:21, 3.49it/s] 33%|███▎ | 124101/371472 [9:52:17<19:43:39, 3.48it/s] 33%|███▎ | 124102/371472 [9:52:17<19:10:46, 3.58it/s] 33%|███▎ | 124103/371472 [9:52:18<19:51:20, 3.46it/s] 33%|███▎ | 124104/371472 [9:52:18<19:27:26, 3.53it/s] 33%|███▎ | 124105/371472 [9:52:18<18:51:16, 3.64it/s] 33%|███▎ | 124106/371472 [9:52:18<18:30:25, 3.71it/s] 33%|███▎ | 124107/371472 [9:52:19<18:52:00, 3.64it/s] 33%|███▎ | 124108/371472 [9:52:19<17:53:47, 3.84it/s] 33%|███▎ | 124109/371472 [9:52:19<17:53:52, 3.84it/s] 33%|███▎ | 124110/371472 [9:52:19<17:41:13, 3.88it/s] 33%|███▎ | 124111/371472 [9:52:20<19:11:51, 3.58it/s] 33%|███▎ | 124112/371472 [9:52:20<18:06:57, 3.79it/s] 33%|███▎ | 124113/371472 [9:52:20<17:44:03, 3.87it/s] 33%|███▎ | 124114/371472 [9:52:20<17:39:12, 3.89it/s] 33%|███▎ | 124115/371472 [9:52:21<17:34:55, 3.91it/s] 33%|███▎ | 124116/371472 [9:52:21<17:43:09, 3.88it/s] 33%|███▎ | 124117/371472 [9:52:21<17:38:22, 3.90it/s] 33%|███▎ | 124118/371472 [9:52:21<17:21:43, 3.96it/s] 33%|███▎ | 124119/371472 [9:52:22<18:38:53, 3.68it/s] 33%|███▎ | 124120/371472 [9:52:22<17:49:17, 3.86it/s] {'loss': 3.2646, 'learning_rate': 6.996056799327716e-07, 'epoch': 5.35} + 33%|███▎ | 124120/371472 [9:52:22<17:49:17, 3.86it/s] 33%|███▎ | 124121/371472 [9:52:22<19:15:34, 3.57it/s] 33%|███▎ | 124122/371472 [9:52:23<18:37:16, 3.69it/s] 33%|███▎ | 124123/371472 [9:52:23<20:53:35, 3.29it/s] 33%|███▎ | 124124/371472 [9:52:23<20:18:16, 3.38it/s] 33%|███▎ | 124125/371472 [9:52:23<19:34:08, 3.51it/s] 33%|███▎ | 124126/371472 [9:52:24<18:57:40, 3.62it/s] 33%|███▎ | 124127/371472 [9:52:24<19:03:02, 3.61it/s] 33%|███▎ | 124128/371472 [9:52:24<18:58:08, 3.62it/s] 33%|███▎ | 124129/371472 [9:52:25<19:01:31, 3.61it/s] 33%|███▎ | 124130/371472 [9:52:25<19:03:17, 3.61it/s] 33%|███▎ | 124131/371472 [9:52:25<18:25:11, 3.73it/s] 33%|███▎ | 124132/371472 [9:52:25<19:45:04, 3.48it/s] 33%|███▎ | 124133/371472 [9:52:26<19:43:50, 3.48it/s] 33%|███▎ | 124134/371472 [9:52:26<19:17:39, 3.56it/s] 33%|███▎ | 124135/371472 [9:52:26<18:55:05, 3.63it/s] 33%|███▎ | 124136/371472 [9:52:26<18:48:26, 3.65it/s] 33%|███▎ | 124137/371472 [9:52:27<18:56:23, 3.63it/s] 33%|███▎ | 124138/371472 [9:52:27<19:06:45, 3.59it/s] 33%|███▎ | 124139/371472 [9:52:27<19:32:05, 3.52it/s] 33%|███▎ | 124140/371472 [9:52:28<19:21:36, 3.55it/s] {'loss': 3.3926, 'learning_rate': 6.995571979572927e-07, 'epoch': 5.35} + 33%|███▎ | 124140/371472 [9:52:28<19:21:36, 3.55it/s] 33%|███▎ | 124141/371472 [9:52:28<18:55:02, 3.63it/s] 33%|███▎ | 124142/371472 [9:52:28<18:09:58, 3.78it/s] 33%|███▎ | 124143/371472 [9:52:28<18:55:16, 3.63it/s] 33%|███▎ | 124144/371472 [9:52:29<18:36:03, 3.69it/s] 33%|███▎ | 124145/371472 [9:52:29<18:59:54, 3.62it/s] 33%|███▎ | 124146/371472 [9:52:29<19:02:15, 3.61it/s] 33%|███▎ | 124147/371472 [9:52:30<18:50:29, 3.65it/s] 33%|███▎ | 124148/371472 [9:52:30<18:27:38, 3.72it/s] 33%|███▎ | 124149/371472 [9:52:30<18:23:47, 3.73it/s] 33%|███▎ | 124150/371472 [9:52:30<21:44:24, 3.16it/s] 33%|███▎ | 124151/371472 [9:52:31<19:52:39, 3.46it/s] 33%|███▎ | 124152/371472 [9:52:31<18:47:58, 3.65it/s] 33%|███▎ | 124153/371472 [9:52:31<18:59:20, 3.62it/s] 33%|███▎ | 124154/371472 [9:52:31<18:07:44, 3.79it/s] 33%|███▎ | 124155/371472 [9:52:32<17:52:45, 3.84it/s] 33%|███▎ | 124156/371472 [9:52:32<17:49:29, 3.85it/s] 33%|███▎ | 124157/371472 [9:52:32<18:21:14, 3.74it/s] 33%|███▎ | 124158/371472 [9:52:32<18:08:44, 3.79it/s] 33%|███▎ | 124159/371472 [9:52:33<18:32:01, 3.71it/s] 33%|███▎ | 124160/371472 [9:52:33<20:33:48, 3.34it/s] {'loss': 3.1273, 'learning_rate': 6.99508715981814e-07, 'epoch': 5.35} + 33%|███▎ | 124160/371472 [9:52:33<20:33:48, 3.34it/s] 33%|███▎ | 124161/371472 [9:52:33<19:57:31, 3.44it/s] 33%|███▎ | 124162/371472 [9:52:34<18:51:33, 3.64it/s] 33%|███▎ | 124163/371472 [9:52:34<18:00:58, 3.81it/s] 33%|███▎ | 124164/371472 [9:52:34<17:45:19, 3.87it/s] 33%|███▎ | 124165/371472 [9:52:34<17:37:03, 3.90it/s] 33%|███▎ | 124166/371472 [9:52:35<17:48:48, 3.86it/s] 33%|███▎ | 124167/371472 [9:52:35<18:32:05, 3.71it/s] 33%|███▎ | 124168/371472 [9:52:35<17:55:45, 3.83it/s] 33%|███▎ | 124169/371472 [9:52:35<17:32:20, 3.92it/s] 33%|███▎ | 124170/371472 [9:52:36<17:23:28, 3.95it/s] 33%|███▎ | 124171/371472 [9:52:36<17:17:59, 3.97it/s] 33%|███▎ | 124172/371472 [9:52:36<17:28:23, 3.93it/s] 33%|███▎ | 124173/371472 [9:52:36<18:41:40, 3.67it/s] 33%|███▎ | 124174/371472 [9:52:37<19:26:00, 3.53it/s] 33%|███▎ | 124175/371472 [9:52:37<19:04:56, 3.60it/s] 33%|███▎ | 124176/371472 [9:52:37<18:43:35, 3.67it/s] 33%|███▎ | 124177/371472 [9:52:38<18:47:18, 3.66it/s] 33%|███▎ | 124178/371472 [9:52:38<19:04:55, 3.60it/s] 33%|███▎ | 124179/371472 [9:52:38<18:53:12, 3.64it/s] 33%|███▎ | 124180/371472 [9:52:38<19:13:09, 3.57it/s] {'loss': 3.3624, 'learning_rate': 6.99460234006335e-07, 'epoch': 5.35} + 33%|███▎ | 124180/371472 [9:52:38<19:13:09, 3.57it/s] 33%|███▎ | 124181/371472 [9:52:39<18:37:49, 3.69it/s] 33%|███▎ | 124182/371472 [9:52:39<18:02:16, 3.81it/s] 33%|███▎ | 124183/371472 [9:52:39<19:11:47, 3.58it/s] 33%|███▎ | 124184/371472 [9:52:40<20:28:14, 3.36it/s] 33%|███▎ | 124185/371472 [9:52:40<20:25:03, 3.36it/s] 33%|███▎ | 124186/371472 [9:52:40<19:09:20, 3.59it/s] 33%|███▎ | 124187/371472 [9:52:40<18:59:00, 3.62it/s] 33%|███▎ | 124188/371472 [9:52:41<18:31:17, 3.71it/s] 33%|███▎ | 124189/371472 [9:52:41<18:14:36, 3.77it/s] 33%|███▎ | 124190/371472 [9:52:41<18:47:48, 3.65it/s] 33%|███▎ | 124191/371472 [9:52:41<19:05:04, 3.60it/s] 33%|███▎ | 124192/371472 [9:52:42<19:26:37, 3.53it/s] 33%|███▎ | 124193/371472 [9:52:42<18:53:25, 3.64it/s] 33%|███▎ | 124194/371472 [9:52:42<19:21:34, 3.55it/s] 33%|███▎ | 124195/371472 [9:52:43<19:19:22, 3.55it/s] 33%|███▎ | 124196/371472 [9:52:43<18:47:16, 3.66it/s] 33%|███▎ | 124197/371472 [9:52:43<18:17:55, 3.75it/s] 33%|███▎ | 124198/371472 [9:52:43<19:04:56, 3.60it/s] 33%|███▎ | 124199/371472 [9:52:44<19:52:56, 3.45it/s] 33%|███▎ | 124200/371472 [9:52:44<20:29:16, 3.35it/s] {'loss': 3.2057, 'learning_rate': 6.99411752030856e-07, 'epoch': 5.35} + 33%|███▎ | 124200/371472 [9:52:44<20:29:16, 3.35it/s] 33%|███▎ | 124201/371472 [9:52:44<20:39:16, 3.33it/s] 33%|███▎ | 124202/371472 [9:52:45<20:26:23, 3.36it/s] 33%|███▎ | 124203/371472 [9:52:45<19:44:23, 3.48it/s] 33%|███▎ | 124204/371472 [9:52:45<19:27:38, 3.53it/s] 33%|███▎ | 124205/371472 [9:52:45<18:56:09, 3.63it/s] 33%|███▎ | 124206/371472 [9:52:46<19:40:06, 3.49it/s] 33%|███▎ | 124207/371472 [9:52:46<19:34:55, 3.51it/s] 33%|███▎ | 124208/371472 [9:52:46<18:56:52, 3.62it/s] 33%|███▎ | 124209/371472 [9:52:47<17:59:29, 3.82it/s] 33%|███▎ | 124210/371472 [9:52:47<17:29:19, 3.93it/s] 33%|███▎ | 124211/371472 [9:52:47<17:34:08, 3.91it/s] 33%|███▎ | 124212/371472 [9:52:47<17:58:22, 3.82it/s] 33%|███▎ | 124213/371472 [9:52:48<18:15:44, 3.76it/s] 33%|███▎ | 124214/371472 [9:52:48<18:21:10, 3.74it/s] 33%|███▎ | 124215/371472 [9:52:48<19:42:50, 3.48it/s] 33%|███▎ | 124216/371472 [9:52:48<19:12:04, 3.58it/s] 33%|███▎ | 124217/371472 [9:52:49<18:24:36, 3.73it/s] 33%|███▎ | 124218/371472 [9:52:49<18:44:49, 3.66it/s] 33%|███▎ | 124219/371472 [9:52:49<19:26:27, 3.53it/s] 33%|███▎ | 124220/371472 [9:52:50<18:51:32, 3.64it/s] {'loss': 3.3404, 'learning_rate': 6.993632700553771e-07, 'epoch': 5.35} + 33%|███▎ | 124220/371472 [9:52:50<18:51:32, 3.64it/s] 33%|███▎ | 124221/371472 [9:52:50<18:47:44, 3.65it/s] 33%|███▎ | 124222/371472 [9:52:50<18:21:21, 3.74it/s] 33%|███▎ | 124223/371472 [9:52:50<19:13:07, 3.57it/s] 33%|███▎ | 124224/371472 [9:52:51<18:39:35, 3.68it/s] 33%|███▎ | 124225/371472 [9:52:51<18:27:30, 3.72it/s] 33%|███▎ | 124226/371472 [9:52:51<18:11:25, 3.78it/s] 33%|███▎ | 124227/371472 [9:52:51<17:59:04, 3.82it/s] 33%|███▎ | 124228/371472 [9:52:52<18:08:20, 3.79it/s] 33%|███▎ | 124229/371472 [9:52:52<17:51:23, 3.85it/s] 33%|███▎ | 124230/371472 [9:52:52<17:40:58, 3.88it/s] 33%|███▎ | 124231/371472 [9:52:52<19:09:10, 3.59it/s] 33%|███▎ | 124232/371472 [9:52:53<18:46:15, 3.66it/s] 33%|███▎ | 124233/371472 [9:52:53<19:26:54, 3.53it/s] 33%|███▎ | 124234/371472 [9:52:53<19:16:47, 3.56it/s] 33%|███▎ | 124235/371472 [9:52:54<19:08:24, 3.59it/s] 33%|███▎ | 124236/371472 [9:52:54<19:35:17, 3.51it/s] 33%|███▎ | 124237/371472 [9:52:54<18:59:49, 3.62it/s] 33%|███▎ | 124238/371472 [9:52:54<18:18:28, 3.75it/s] 33%|███▎ | 124239/371472 [9:52:55<18:23:25, 3.73it/s] 33%|███▎ | 124240/371472 [9:52:55<18:42:00, 3.67it/s] {'loss': 3.162, 'learning_rate': 6.993147880798983e-07, 'epoch': 5.35} + 33%|███▎ | 124240/371472 [9:52:55<18:42:00, 3.67it/s] 33%|███▎ | 124241/371472 [9:52:55<18:27:29, 3.72it/s] 33%|███▎ | 124242/371472 [9:52:56<19:25:58, 3.53it/s] 33%|███▎ | 124243/371472 [9:52:56<19:48:14, 3.47it/s] 33%|███▎ | 124244/371472 [9:52:56<19:10:06, 3.58it/s] 33%|███▎ | 124245/371472 [9:52:56<18:23:06, 3.74it/s] 33%|███▎ | 124246/371472 [9:52:57<19:16:36, 3.56it/s] 33%|███▎ | 124247/371472 [9:52:57<19:49:01, 3.47it/s] 33%|███▎ | 124248/371472 [9:52:57<19:07:25, 3.59it/s] 33%|███▎ | 124249/371472 [9:52:58<21:02:44, 3.26it/s] 33%|███▎ | 124250/371472 [9:52:58<19:50:32, 3.46it/s] 33%|███▎ | 124251/371472 [9:52:58<19:17:54, 3.56it/s] 33%|███▎ | 124252/371472 [9:52:58<19:12:09, 3.58it/s] 33%|███▎ | 124253/371472 [9:52:59<18:50:52, 3.64it/s] 33%|███▎ | 124254/371472 [9:52:59<18:39:08, 3.68it/s] 33%|███▎ | 124255/371472 [9:52:59<19:03:39, 3.60it/s] 33%|███▎ | 124256/371472 [9:52:59<18:30:28, 3.71it/s] 33%|███▎ | 124257/371472 [9:53:00<18:46:40, 3.66it/s] 33%|███▎ | 124258/371472 [9:53:00<18:44:32, 3.66it/s] 33%|███▎ | 124259/371472 [9:53:00<19:46:12, 3.47it/s] 33%|███▎ | 124260/371472 [9:53:01<18:52:58, 3.64it/s] {'loss': 3.011, 'learning_rate': 6.992663061044195e-07, 'epoch': 5.35} + 33%|███▎ | 124260/371472 [9:53:01<18:52:58, 3.64it/s] 33%|███▎ | 124261/371472 [9:53:01<18:22:59, 3.74it/s] 33%|███▎ | 124262/371472 [9:53:01<18:08:20, 3.79it/s] 33%|███▎ | 124263/371472 [9:53:01<18:16:01, 3.76it/s] 33%|███▎ | 124264/371472 [9:53:02<19:13:45, 3.57it/s] 33%|███▎ | 124265/371472 [9:53:02<18:59:22, 3.62it/s] 33%|███▎ | 124266/371472 [9:53:02<18:51:39, 3.64it/s] 33%|███▎ | 124267/371472 [9:53:03<21:38:09, 3.17it/s] 33%|███▎ | 124268/371472 [9:53:03<20:13:24, 3.40it/s] 33%|███▎ | 124269/371472 [9:53:03<19:59:58, 3.43it/s] 33%|███▎ | 124270/371472 [9:53:03<19:24:39, 3.54it/s] 33%|███▎ | 124271/371472 [9:53:04<18:40:32, 3.68it/s] 33%|███▎ | 124272/371472 [9:53:04<18:59:34, 3.62it/s] 33%|███▎ | 124273/371472 [9:53:04<18:55:57, 3.63it/s] 33%|███▎ | 124274/371472 [9:53:04<19:03:52, 3.60it/s] 33%|███▎ | 124275/371472 [9:53:05<19:33:27, 3.51it/s] 33%|███▎ | 124276/371472 [9:53:05<19:54:19, 3.45it/s] 33%|███▎ | 124277/371472 [9:53:05<18:42:37, 3.67it/s] 33%|███▎ | 124278/371472 [9:53:06<18:20:10, 3.74it/s] 33%|███▎ | 124279/371472 [9:53:06<18:24:47, 3.73it/s] 33%|███▎ | 124280/371472 [9:53:06<18:44:50, 3.66it/s] {'loss': 3.2391, 'learning_rate': 6.992178241289405e-07, 'epoch': 5.35} + 33%|███▎ | 124280/371472 [9:53:06<18:44:50, 3.66it/s] 33%|███▎ | 124281/371472 [9:53:06<18:48:24, 3.65it/s] 33%|███▎ | 124282/371472 [9:53:07<18:55:07, 3.63it/s] 33%|███▎ | 124283/371472 [9:53:07<18:51:27, 3.64it/s] 33%|███▎ | 124284/371472 [9:53:07<21:11:21, 3.24it/s] 33%|███▎ | 124285/371472 [9:53:08<19:52:11, 3.46it/s] 33%|███▎ | 124286/371472 [9:53:08<18:52:31, 3.64it/s] 33%|███▎ | 124287/371472 [9:53:08<18:55:17, 3.63it/s] 33%|███▎ | 124288/371472 [9:53:08<20:09:05, 3.41it/s] 33%|███▎ | 124289/371472 [9:53:09<19:15:12, 3.57it/s] 33%|███▎ | 124290/371472 [9:53:09<18:57:35, 3.62it/s] 33%|███▎ | 124291/371472 [9:53:09<18:44:39, 3.66it/s] 33%|███▎ | 124292/371472 [9:53:10<18:45:10, 3.66it/s] 33%|███▎ | 124293/371472 [9:53:10<19:04:18, 3.60it/s] 33%|███▎ | 124294/371472 [9:53:10<18:46:44, 3.66it/s] 33%|███▎ | 124295/371472 [9:53:10<18:24:29, 3.73it/s] 33%|███▎ | 124296/371472 [9:53:11<18:22:53, 3.74it/s] 33%|███▎ | 124297/371472 [9:53:11<18:34:45, 3.70it/s] 33%|███▎ | 124298/371472 [9:53:11<18:13:14, 3.77it/s] 33%|███▎ | 124299/371472 [9:53:11<17:51:53, 3.84it/s] 33%|███▎ | 124300/371472 [9:53:12<18:54:19, 3.63it/s] {'loss': 3.3316, 'learning_rate': 6.991693421534616e-07, 'epoch': 5.35} + 33%|███▎ | 124300/371472 [9:53:12<18:54:19, 3.63it/s] 33%|███▎ | 124301/371472 [9:53:12<19:02:06, 3.61it/s] 33%|███▎ | 124302/371472 [9:53:12<18:55:31, 3.63it/s] 33%|███▎ | 124303/371472 [9:53:12<18:36:55, 3.69it/s] 33%|███▎ | 124304/371472 [9:53:13<19:27:05, 3.53it/s] 33%|███▎ | 124305/371472 [9:53:13<19:52:07, 3.46it/s] 33%|███▎ | 124306/371472 [9:53:13<19:06:57, 3.59it/s] 33%|███▎ | 124307/371472 [9:53:14<18:30:49, 3.71it/s] 33%|███▎ | 124308/371472 [9:53:14<20:42:57, 3.31it/s] 33%|███▎ | 124309/371472 [9:53:14<19:44:30, 3.48it/s] 33%|███▎ | 124310/371472 [9:53:15<20:39:46, 3.32it/s] 33%|███▎ | 124311/371472 [9:53:15<20:02:39, 3.43it/s] 33%|███▎ | 124312/371472 [9:53:15<19:29:40, 3.52it/s] 33%|███▎ | 124313/371472 [9:53:15<21:02:51, 3.26it/s] 33%|███▎ | 124314/371472 [9:53:16<20:34:52, 3.34it/s] 33%|███▎ | 124315/371472 [9:53:16<20:46:25, 3.30it/s] 33%|███▎ | 124316/371472 [9:53:16<21:20:44, 3.22it/s] 33%|███▎ | 124317/371472 [9:53:17<23:30:37, 2.92it/s] 33%|███▎ | 124318/371472 [9:53:17<22:18:33, 3.08it/s] 33%|███▎ | 124319/371472 [9:53:17<22:17:07, 3.08it/s] 33%|███▎ | 124320/371472 [9:53:18<21:00:10, 3.27it/s] {'loss': 3.2561, 'learning_rate': 6.991208601779827e-07, 'epoch': 5.35} + 33%|███▎ | 124320/371472 [9:53:18<21:00:10, 3.27it/s] 33%|███▎ | 124321/371472 [9:53:18<22:13:17, 3.09it/s] 33%|███▎ | 124322/371472 [9:53:18<21:16:12, 3.23it/s] 33%|███▎ | 124323/371472 [9:53:19<20:28:19, 3.35it/s] 33%|███▎ | 124324/371472 [9:53:19<20:54:01, 3.28it/s] 33%|███▎ | 124325/371472 [9:53:19<19:58:58, 3.44it/s] 33%|███▎ | 124326/371472 [9:53:19<19:42:26, 3.48it/s] 33%|███▎ | 124327/371472 [9:53:20<20:23:21, 3.37it/s] 33%|███▎ | 124328/371472 [9:53:20<20:45:42, 3.31it/s] 33%|███▎ | 124329/371472 [9:53:20<20:23:20, 3.37it/s] 33%|███▎ | 124330/371472 [9:53:21<19:35:31, 3.50it/s] 33%|███▎ | 124331/371472 [9:53:21<19:04:27, 3.60it/s] 33%|███▎ | 124332/371472 [9:53:21<19:38:49, 3.49it/s] 33%|███▎ | 124333/371472 [9:53:21<20:11:53, 3.40it/s] 33%|███▎ | 124334/371472 [9:53:22<20:31:44, 3.34it/s] 33%|███▎ | 124335/371472 [9:53:22<19:43:39, 3.48it/s] 33%|███▎ | 124336/371472 [9:53:22<19:14:49, 3.57it/s] 33%|███▎ | 124337/371472 [9:53:23<18:42:50, 3.67it/s] 33%|███▎ | 124338/371472 [9:53:23<18:53:13, 3.63it/s] 33%|███▎ | 124339/371472 [9:53:23<18:10:02, 3.78it/s] 33%|███▎ | 124340/371472 [9:53:23<18:39:42, 3.68it/s] {'loss': 3.2998, 'learning_rate': 6.990723782025038e-07, 'epoch': 5.36} + 33%|███▎ | 124340/371472 [9:53:23<18:39:42, 3.68it/s] 33%|███▎ | 124341/371472 [9:53:24<18:45:42, 3.66it/s] 33%|███▎ | 124342/371472 [9:53:24<19:31:34, 3.52it/s] 33%|███▎ | 124343/371472 [9:53:24<21:29:02, 3.20it/s] 33%|███▎ | 124344/371472 [9:53:25<20:20:01, 3.38it/s] 33%|███▎ | 124345/371472 [9:53:25<20:05:52, 3.42it/s] 33%|███▎ | 124346/371472 [9:53:25<19:12:21, 3.57it/s] 33%|███▎ | 124347/371472 [9:53:25<19:11:31, 3.58it/s] 33%|███▎ | 124348/371472 [9:53:26<18:40:04, 3.68it/s] 33%|███▎ | 124349/371472 [9:53:26<19:19:33, 3.55it/s] 33%|███▎ | 124350/371472 [9:53:26<18:44:51, 3.66it/s] 33%|███▎ | 124351/371472 [9:53:27<19:09:46, 3.58it/s] 33%|███▎ | 124352/371472 [9:53:27<19:38:54, 3.49it/s] 33%|███▎ | 124353/371472 [9:53:27<20:19:13, 3.38it/s] 33%|███▎ | 124354/371472 [9:53:27<19:10:32, 3.58it/s] 33%|███▎ | 124355/371472 [9:53:28<18:47:21, 3.65it/s] 33%|██���▎ | 124356/371472 [9:53:28<19:43:14, 3.48it/s] 33%|███▎ | 124357/371472 [9:53:28<19:41:16, 3.49it/s] 33%|███▎ | 124358/371472 [9:53:29<18:46:21, 3.66it/s] 33%|███▎ | 124359/371472 [9:53:29<19:07:28, 3.59it/s] 33%|███▎ | 124360/371472 [9:53:29<18:49:02, 3.65it/s] {'loss': 3.1571, 'learning_rate': 6.990238962270249e-07, 'epoch': 5.36} + 33%|███▎ | 124360/371472 [9:53:29<18:49:02, 3.65it/s] 33%|███▎ | 124361/371472 [9:53:29<18:27:05, 3.72it/s] 33%|███▎ | 124362/371472 [9:53:30<20:29:46, 3.35it/s] 33%|███▎ | 124363/371472 [9:53:30<20:25:38, 3.36it/s] 33%|███▎ | 124364/371472 [9:53:30<20:15:47, 3.39it/s] 33%|███▎ | 124365/371472 [9:53:31<19:19:23, 3.55it/s] 33%|███▎ | 124366/371472 [9:53:31<19:07:30, 3.59it/s] 33%|███▎ | 124367/371472 [9:53:31<18:53:43, 3.63it/s] 33%|███▎ | 124368/371472 [9:53:31<19:30:59, 3.52it/s] 33%|███▎ | 124369/371472 [9:53:32<20:32:31, 3.34it/s] 33%|███▎ | 124370/371472 [9:53:32<21:36:32, 3.18it/s] 33%|███▎ | 124371/371472 [9:53:32<21:49:10, 3.15it/s] 33%|███▎ | 124372/371472 [9:53:33<20:39:07, 3.32it/s] 33%|███▎ | 124373/371472 [9:53:33<19:43:19, 3.48it/s] 33%|███▎ | 124374/371472 [9:53:33<19:27:26, 3.53it/s] 33%|███▎ | 124375/371472 [9:53:33<18:42:38, 3.67it/s] 33%|███▎ | 124376/371472 [9:53:34<18:57:17, 3.62it/s] 33%|███▎ | 124377/371472 [9:53:34<18:19:47, 3.74it/s] 33%|███▎ | 124378/371472 [9:53:34<18:15:49, 3.76it/s] 33%|███▎ | 124379/371472 [9:53:34<17:49:23, 3.85it/s] 33%|███▎ | 124380/371472 [9:53:35<22:17:37, 3.08it/s] {'loss': 3.1613, 'learning_rate': 6.98975414251546e-07, 'epoch': 5.36} + 33%|███▎ | 124380/371472 [9:53:35<22:17:37, 3.08it/s] 33%|███▎ | 124381/371472 [9:53:35<22:40:48, 3.03it/s] 33%|███▎ | 124382/371472 [9:53:36<23:10:44, 2.96it/s] 33%|███▎ | 124383/371472 [9:53:36<23:16:34, 2.95it/s] 33%|███▎ | 124384/371472 [9:53:36<22:24:11, 3.06it/s] 33%|███▎ | 124385/371472 [9:53:37<21:41:31, 3.16it/s] 33%|███▎ | 124386/371472 [9:53:37<20:24:44, 3.36it/s] 33%|███▎ | 124387/371472 [9:53:37<20:15:32, 3.39it/s] 33%|███▎ | 124388/371472 [9:53:37<19:46:58, 3.47it/s] 33%|███▎ | 124389/371472 [9:53:38<18:53:32, 3.63it/s] 33%|███▎ | 124390/371472 [9:53:38<18:10:40, 3.78it/s] 33%|███▎ | 124391/371472 [9:53:38<18:00:44, 3.81it/s] 33%|███▎ | 124392/371472 [9:53:38<18:55:42, 3.63it/s] 33%|███▎ | 124393/371472 [9:53:39<19:17:27, 3.56it/s] 33%|███▎ | 124394/371472 [9:53:39<19:03:42, 3.60it/s] 33%|███▎ | 124395/371472 [9:53:39<18:58:40, 3.62it/s] 33%|███▎ | 124396/371472 [9:53:40<19:52:59, 3.45it/s] 33%|███▎ | 124397/371472 [9:53:40<19:48:19, 3.47it/s] 33%|███▎ | 124398/371472 [9:53:40<19:50:39, 3.46it/s] 33%|███▎ | 124399/371472 [9:53:40<18:54:43, 3.63it/s] 33%|███▎ | 124400/371472 [9:53:41<19:18:29, 3.55it/s] {'loss': 3.2361, 'learning_rate': 6.989269322760672e-07, 'epoch': 5.36} + 33%|███▎ | 124400/371472 [9:53:41<19:18:29, 3.55it/s] 33%|███▎ | 124401/371472 [9:53:41<18:56:31, 3.62it/s] 33%|███▎ | 124402/371472 [9:53:41<18:24:22, 3.73it/s] 33%|███▎ | 124403/371472 [9:53:42<20:26:01, 3.36it/s] 33%|███▎ | 124404/371472 [9:53:42<19:25:55, 3.53it/s] 33%|███▎ | 124405/371472 [9:53:42<19:27:15, 3.53it/s] 33%|███▎ | 124406/371472 [9:53:42<19:27:54, 3.53it/s] 33%|███▎ | 124407/371472 [9:53:43<18:33:24, 3.70it/s] 33%|███▎ | 124408/371472 [9:53:43<18:21:39, 3.74it/s] 33%|███▎ | 124409/371472 [9:53:43<17:51:49, 3.84it/s] 33%|███▎ | 124410/371472 [9:53:43<18:04:28, 3.80it/s] 33%|███▎ | 124411/371472 [9:53:44<19:02:48, 3.60it/s] 33%|███▎ | 124412/371472 [9:53:44<20:02:19, 3.42it/s] 33%|███▎ | 124413/371472 [9:53:44<19:32:08, 3.51it/s] 33%|███▎ | 124414/371472 [9:53:45<18:48:38, 3.65it/s] 33%|███▎ | 124415/371472 [9:53:45<19:38:05, 3.50it/s] 33%|███▎ | 124416/371472 [9:53:45<19:09:43, 3.58it/s] 33%|███▎ | 124417/371472 [9:53:45<19:01:46, 3.61it/s] 33%|███▎ | 124418/371472 [9:53:46<19:31:04, 3.52it/s] 33%|███▎ | 124419/371472 [9:53:46<19:26:33, 3.53it/s] 33%|███▎ | 124420/371472 [9:53:46<18:36:43, 3.69it/s] {'loss': 3.255, 'learning_rate': 6.988784503005882e-07, 'epoch': 5.36} + 33%|███▎ | 124420/371472 [9:53:46<18:36:43, 3.69it/s] 33%|███▎ | 124421/371472 [9:53:47<19:13:42, 3.57it/s] 33%|███▎ | 124422/371472 [9:53:47<18:42:19, 3.67it/s] 33%|███▎ | 124423/371472 [9:53:47<18:59:04, 3.61it/s] 33%|███▎ | 124424/371472 [9:53:47<18:51:48, 3.64it/s] 33%|███▎ | 124425/371472 [9:53:48<19:15:59, 3.56it/s] 33%|███▎ | 124426/371472 [9:53:48<18:26:39, 3.72it/s] 33%|███▎ | 124427/371472 [9:53:48<19:10:32, 3.58it/s] 33%|███▎ | 124428/371472 [9:53:48<19:16:58, 3.56it/s] 33%|███▎ | 124429/371472 [9:53:49<19:12:31, 3.57it/s] 33%|███▎ | 124430/371472 [9:53:49<18:36:07, 3.69it/s] 33%|███▎ | 124431/371472 [9:53:49<18:22:20, 3.74it/s] 33%|███▎ | 124432/371472 [9:53:50<18:17:18, 3.75it/s] 33%|███▎ | 124433/371472 [9:53:50<18:56:38, 3.62it/s] 33%|███▎ | 124434/371472 [9:53:50<19:01:39, 3.61it/s] 33%|███▎ | 124435/371472 [9:53:50<19:05:40, 3.59it/s] 33%|███▎ | 124436/371472 [9:53:51<19:52:48, 3.45it/s] 33%|███▎ | 124437/371472 [9:53:51<19:24:04, 3.54it/s] 33%|███▎ | 124438/371472 [9:53:51<19:29:45, 3.52it/s] 33%|███▎ | 124439/371472 [9:53:52<19:51:01, 3.46it/s] 33%|███▎ | 124440/371472 [9:53:52<20:36:35, 3.33it/s] {'loss': 3.3, 'learning_rate': 6.988299683251093e-07, 'epoch': 5.36} + 33%|███▎ | 124440/371472 [9:53:52<20:36:35, 3.33it/s] 33%|███▎ | 124441/371472 [9:53:52<19:40:09, 3.49it/s] 33%|███▎ | 124442/371472 [9:53:52<19:06:37, 3.59it/s] 33%|███▎ | 124443/371472 [9:53:53<19:27:02, 3.53it/s] 34%|███▎ | 124444/371472 [9:53:53<19:26:02, 3.53it/s] 34%|███▎ | 124445/371472 [9:53:53<18:53:14, 3.63it/s] 34%|███▎ | 124446/371472 [9:53:53<18:36:41, 3.69it/s] 34%|███▎ | 124447/371472 [9:53:54<18:08:46, 3.78it/s] 34%|███▎ | 124448/371472 [9:53:54<17:57:24, 3.82it/s] 34%|███▎ | 124449/371472 [9:53:54<18:01:30, 3.81it/s] 34%|███▎ | 124450/371472 [9:53:55<18:04:46, 3.80it/s] 34%|███▎ | 124451/371472 [9:53:55<18:13:37, 3.76it/s] 34%|███▎ | 124452/371472 [9:53:55<17:30:21, 3.92it/s] 34%|███▎ | 124453/371472 [9:53:55<19:41:42, 3.48it/s] 34%|███▎ | 124454/371472 [9:53:56<20:07:40, 3.41it/s] 34%|███▎ | 124455/371472 [9:53:56<19:27:27, 3.53it/s] 34%|███▎ | 124456/371472 [9:53:56<18:28:14, 3.71it/s] 34%|███▎ | 124457/371472 [9:53:56<17:38:35, 3.89it/s] 34%|███▎ | 124458/371472 [9:53:57<17:24:14, 3.94it/s] 34%|███▎ | 124459/371472 [9:53:57<17:37:20, 3.89it/s] 34%|███▎ | 124460/371472 [9:53:57<17:23:10, 3.95it/s] {'loss': 3.3961, 'learning_rate': 6.987814863496304e-07, 'epoch': 5.36} + 34%|███▎ | 124460/371472 [9:53:57<17:23:10, 3.95it/s] 34%|███▎ | 124461/371472 [9:53:57<17:31:27, 3.92it/s] 34%|███▎ | 124462/371472 [9:53:58<17:23:29, 3.95it/s] 34%|███▎ | 124463/371472 [9:53:58<19:10:47, 3.58it/s] 34%|███▎ | 124464/371472 [9:53:58<19:21:50, 3.54it/s] 34%|███▎ | 124465/371472 [9:53:59<18:54:35, 3.63it/s] 34%|███▎ | 124466/371472 [9:53:59<19:52:25, 3.45it/s] 34%|███▎ | 124467/371472 [9:53:59<19:20:01, 3.55it/s] 34%|███▎ | 124468/371472 [9:53:59<19:04:46, 3.60it/s] 34%|███▎ | 124469/371472 [9:54:00<19:04:56, 3.60it/s] 34%|███▎ | 124470/371472 [9:54:00<19:33:14, 3.51it/s] 34%|███▎ | 124471/371472 [9:54:00<19:54:26, 3.45it/s] 34%|███▎ | 124472/371472 [9:54:01<20:01:38, 3.43it/s] 34%|███▎ | 124473/371472 [9:54:01<19:19:31, 3.55it/s] 34%|███▎ | 124474/371472 [9:54:01<19:31:29, 3.51it/s] 34%|███▎ | 124475/371472 [9:54:01<19:16:07, 3.56it/s] 34%|███▎ | 124476/371472 [9:54:02<19:17:10, 3.56it/s] 34%|███▎ | 124477/371472 [9:54:02<18:53:44, 3.63it/s] 34%|███▎ | 124478/371472 [9:54:02<19:27:48, 3.53it/s] 34%|███▎ | 124479/371472 [9:54:03<18:53:05, 3.63it/s] 34%|███▎ | 124480/371472 [9:54:03<18:05:54, 3.79it/s] {'loss': 3.2765, 'learning_rate': 6.987330043741516e-07, 'epoch': 5.36} + 34%|███▎ | 124480/371472 [9:54:03<18:05:54, 3.79it/s] 34%|███▎ | 124481/371472 [9:54:03<18:13:33, 3.76it/s] 34%|███▎ | 124482/371472 [9:54:03<18:48:08, 3.65it/s] 34%|███▎ | 124483/371472 [9:54:04<18:36:42, 3.69it/s] 34%|███▎ | 124484/371472 [9:54:04<18:49:00, 3.65it/s] 34%|███▎ | 124485/371472 [9:54:04<17:58:19, 3.82it/s] 34%|███▎ | 124486/371472 [9:54:04<18:33:10, 3.70it/s] 34%|███▎ | 124487/371472 [9:54:05<18:12:31, 3.77it/s] 34%|███▎ | 124488/371472 [9:54:05<18:10:03, 3.78it/s] 34%|███▎ | 124489/371472 [9:54:05<17:48:55, 3.85it/s] 34%|███▎ | 124490/371472 [9:54:05<17:45:53, 3.86it/s] 34%|███▎ | 124491/371472 [9:54:06<19:19:49, 3.55it/s] 34%|███▎ | 124492/371472 [9:54:06<20:41:22, 3.32it/s] 34%|███▎ | 124493/371472 [9:54:06<20:50:26, 3.29it/s] 34%|███▎ | 124494/371472 [9:54:07<20:15:28, 3.39it/s] 34%|███▎ | 124495/371472 [9:54:07<19:30:38, 3.52it/s] 34%|███▎ | 124496/371472 [9:54:07<20:51:05, 3.29it/s] 34%|███▎ | 124497/371472 [9:54:08<19:56:23, 3.44it/s] 34%|███▎ | 124498/371472 [9:54:08<18:42:15, 3.67it/s] 34%|███▎ | 124499/371472 [9:54:08<18:55:44, 3.62it/s] 34%|███▎ | 124500/371472 [9:54:08<18:45:15, 3.66it/s] {'loss': 3.2107, 'learning_rate': 6.986845223986726e-07, 'epoch': 5.36} + 34%|███▎ | 124500/371472 [9:54:08<18:45:15, 3.66it/s] 34%|███▎ | 124501/371472 [9:54:09<19:16:27, 3.56it/s] 34%|███▎ | 124502/371472 [9:54:09<20:09:20, 3.40it/s] 34%|███▎ | 124503/371472 [9:54:09<20:13:20, 3.39it/s] 34%|███▎ | 124504/371472 [9:54:10<21:03:14, 3.26it/s] 34%|███▎ | 124505/371472 [9:54:10<20:49:31, 3.29it/s] 34%|███▎ | 124506/371472 [9:54:10<20:26:48, 3.36it/s] 34%|███▎ | 124507/371472 [9:54:10<19:59:09, 3.43it/s] 34%|███▎ | 124508/371472 [9:54:11<20:54:49, 3.28it/s] 34%|███▎ | 124509/371472 [9:54:11<19:41:39, 3.48it/s] 34%|███▎ | 124510/371472 [9:54:11<19:56:28, 3.44it/s] 34%|███▎ | 124511/371472 [9:54:12<19:04:31, 3.60it/s] 34%|███▎ | 124512/371472 [9:54:12<18:13:50, 3.76it/s] 34%|███▎ | 124513/371472 [9:54:12<19:06:21, 3.59it/s] 34%|███▎ | 124514/371472 [9:54:12<18:15:01, 3.76it/s] 34%|███▎ | 124515/371472 [9:54:13<18:48:22, 3.65it/s] 34%|███▎ | 124516/371472 [9:54:13<18:47:22, 3.65it/s] 34%|███▎ | 124517/371472 [9:54:13<22:52:56, 3.00it/s] 34%|███▎ | 124518/371472 [9:54:14<22:06:34, 3.10it/s] 34%|███▎ | 124519/371472 [9:54:14<23:36:33, 2.91it/s] 34%|███▎ | 124520/371472 [9:54:14<22:42:41, 3.02it/s] {'loss': 3.0093, 'learning_rate': 6.986360404231937e-07, 'epoch': 5.36} + 34%|███▎ | 124520/371472 [9:54:14<22:42:41, 3.02it/s] 34%|███▎ | 124521/371472 [9:54:15<21:21:53, 3.21it/s] 34%|███▎ | 124522/371472 [9:54:15<21:27:09, 3.20it/s] 34%|███▎ | 124523/371472 [9:54:15<22:53:30, 3.00it/s] 34%|███▎ | 124524/371472 [9:54:16<20:52:23, 3.29it/s] 34%|███▎ | 124525/371472 [9:54:16<20:01:01, 3.43it/s] 34%|███▎ | 124526/371472 [9:54:16<19:51:08, 3.46it/s] 34%|███▎ | 124527/371472 [9:54:16<19:24:49, 3.53it/s] 34%|███▎ | 124528/371472 [9:54:17<20:07:11, 3.41it/s] 34%|███▎ | 124529/371472 [9:54:17<19:18:29, 3.55it/s] 34%|███▎ | 124530/371472 [9:54:17<18:44:10, 3.66it/s] 34%|███▎ | 124531/371472 [9:54:18<18:43:56, 3.66it/s] 34%|███▎ | 124532/371472 [9:54:18<19:38:17, 3.49it/s] 34%|███▎ | 124533/371472 [9:54:18<20:22:29, 3.37it/s] 34%|███▎ | 124534/371472 [9:54:18<20:18:00, 3.38it/s] 34%|███▎ | 124535/371472 [9:54:19<19:28:19, 3.52it/s] 34%|███▎ | 124536/371472 [9:54:19<19:25:14, 3.53it/s] 34%|███▎ | 124537/371472 [9:54:19<19:40:40, 3.49it/s] 34%|███▎ | 124538/371472 [9:54:20<20:04:01, 3.42it/s] 34%|███▎ | 124539/371472 [9:54:20<20:51:26, 3.29it/s] 34%|███▎ | 124540/371472 [9:54:20<20:01:50, 3.42it/s] {'loss': 3.1355, 'learning_rate': 6.985875584477149e-07, 'epoch': 5.36} + 34%|███▎ | 124540/371472 [9:54:20<20:01:50, 3.42it/s] 34%|███▎ | 124541/371472 [9:54:20<19:31:28, 3.51it/s] 34%|███▎ | 124542/371472 [9:54:21<18:30:38, 3.71it/s] 34%|███▎ | 124543/371472 [9:54:21<19:48:07, 3.46it/s] 34%|███▎ | 124544/371472 [9:54:21<19:33:01, 3.51it/s] 34%|███▎ | 124545/371472 [9:54:22<20:33:49, 3.34it/s] 34%|███▎ | 124546/371472 [9:54:22<19:39:57, 3.49it/s] 34%|███▎ | 124547/371472 [9:54:22<18:28:41, 3.71it/s] 34%|███▎ | 124548/371472 [9:54:22<19:20:53, 3.55it/s] 34%|███▎ | 124549/371472 [9:54:23<18:41:42, 3.67it/s] 34%|███▎ | 124550/371472 [9:54:23<18:46:57, 3.65it/s] 34%|███▎ | 124551/371472 [9:54:23<20:08:03, 3.41it/s] 34%|███▎ | 124552/371472 [9:54:24<19:05:43, 3.59it/s] 34%|███▎ | 124553/371472 [9:54:24<20:46:52, 3.30it/s] 34%|███▎ | 124554/371472 [9:54:24<20:06:17, 3.41it/s] 34%|███▎ | 124555/371472 [9:54:24<19:45:22, 3.47it/s] 34%|███▎ | 124556/371472 [9:54:25<19:16:55, 3.56it/s] 34%|███▎ | 124557/371472 [9:54:25<18:34:51, 3.69it/s] 34%|███▎ | 124558/371472 [9:54:25<18:42:16, 3.67it/s] 34%|███▎ | 124559/371472 [9:54:25<18:19:20, 3.74it/s] 34%|███▎ | 124560/371472 [9:54:26<18:08:41, 3.78it/s] {'loss': 2.961, 'learning_rate': 6.98539076472236e-07, 'epoch': 5.37} + 34%|███▎ | 124560/371472 [9:54:26<18:08:41, 3.78it/s] 34%|███▎ | 124561/371472 [9:54:26<17:53:22, 3.83it/s] 34%|███▎ | 124562/371472 [9:54:26<17:59:36, 3.81it/s] 34%|███▎ | 124563/371472 [9:54:27<18:28:54, 3.71it/s] 34%|███▎ | 124564/371472 [9:54:27<18:10:24, 3.77it/s] 34%|███▎ | 124565/371472 [9:54:27<17:53:47, 3.83it/s] 34%|███▎ | 124566/371472 [9:54:27<19:32:53, 3.51it/s] 34%|███▎ | 124567/371472 [9:54:28<19:21:11, 3.54it/s] 34%|███▎ | 124568/371472 [9:54:28<19:32:34, 3.51it/s] 34%|███▎ | 124569/371472 [9:54:28<19:18:04, 3.55it/s] 34%|███▎ | 124570/371472 [9:54:29<19:28:04, 3.52it/s] 34%|███▎ | 124571/371472 [9:54:29<19:25:23, 3.53it/s] 34%|███▎ | 124572/371472 [9:54:29<19:22:06, 3.54it/s] 34%|███▎ | 124573/371472 [9:54:29<19:16:58, 3.56it/s] 34%|███▎ | 124574/371472 [9:54:30<19:03:24, 3.60it/s] 34%|███▎ | 124575/371472 [9:54:30<20:43:21, 3.31it/s] 34%|███▎ | 124576/371472 [9:54:30<22:28:32, 3.05it/s] 34%|███▎ | 124577/371472 [9:54:31<23:08:54, 2.96it/s] 34%|███▎ | 124578/371472 [9:54:31<23:10:58, 2.96it/s] 34%|███▎ | 124579/371472 [9:54:31<21:39:15, 3.17it/s] 34%|███▎ | 124580/371472 [9:54:32<20:47:12, 3.30it/s] {'loss': 3.3422, 'learning_rate': 6.98490594496757e-07, 'epoch': 5.37} + 34%|███▎ | 124580/371472 [9:54:32<20:47:12, 3.30it/s] 34%|███▎ | 124581/371472 [9:54:32<20:05:53, 3.41it/s] 34%|███▎ | 124582/371472 [9:54:32<19:04:11, 3.60it/s] 34%|███▎ | 124583/371472 [9:54:32<18:40:42, 3.67it/s] 34%|███▎ | 124584/371472 [9:54:33<18:56:56, 3.62it/s] 34%|███▎ | 124585/371472 [9:54:33<19:58:30, 3.43it/s] 34%|███▎ | 124586/371472 [9:54:33<19:26:43, 3.53it/s] 34%|███▎ | 124587/371472 [9:54:34<18:54:38, 3.63it/s] 34%|███▎ | 124588/371472 [9:54:34<18:53:27, 3.63it/s] 34%|███▎ | 124589/371472 [9:54:34<19:07:41, 3.59it/s] 34%|███▎ | 124590/371472 [9:54:34<18:53:24, 3.63it/s] 34%|███▎ | 124591/371472 [9:54:35<18:58:03, 3.62it/s] 34%|███▎ | 124592/371472 [9:54:35<21:06:25, 3.25it/s] 34%|███▎ | 124593/371472 [9:54:35<20:19:10, 3.37it/s] 34%|███▎ | 124594/371472 [9:54:36<20:35:08, 3.33it/s] 34%|███▎ | 124595/371472 [9:54:36<19:57:36, 3.44it/s] 34%|███▎ | 124596/371472 [9:54:36<19:01:50, 3.60it/s] 34%|███▎ | 124597/371472 [9:54:36<18:31:22, 3.70it/s] 34%|███▎ | 124598/371472 [9:54:37<19:19:50, 3.55it/s] 34%|███▎ | 124599/371472 [9:54:37<18:47:46, 3.65it/s] 34%|███▎ | 124600/371472 [9:54:37<18:09:05, 3.78it/s] {'loss': 3.3043, 'learning_rate': 6.984421125212781e-07, 'epoch': 5.37} + 34%|███▎ | 124600/371472 [9:54:37<18:09:05, 3.78it/s] 34%|███▎ | 124601/371472 [9:54:37<18:28:33, 3.71it/s] 34%|███▎ | 124602/371472 [9:54:38<17:59:03, 3.81it/s] 34%|███▎ | 124603/371472 [9:54:38<18:31:12, 3.70it/s] 34%|███▎ | 124604/371472 [9:54:38<19:06:45, 3.59it/s] 34%|███▎ | 124605/371472 [9:54:39<18:43:32, 3.66it/s] 34%|███▎ | 124606/371472 [9:54:39<18:52:18, 3.63it/s] 34%|███▎ | 124607/371472 [9:54:39<18:35:49, 3.69it/s] 34%|███▎ | 124608/371472 [9:54:39<18:19:45, 3.74it/s] 34%|███▎ | 124609/371472 [9:54:40<20:23:29, 3.36it/s] 34%|███▎ | 124610/371472 [9:54:40<20:21:40, 3.37it/s] 34%|███▎ | 124611/371472 [9:54:40<20:09:42, 3.40it/s] 34%|███▎ | 124612/371472 [9:54:41<19:07:39, 3.58it/s] 34%|███▎ | 124613/371472 [9:54:41<19:46:26, 3.47it/s] 34%|███▎ | 124614/371472 [9:54:41<19:01:36, 3.60it/s] 34%|███▎ | 124615/371472 [9:54:41<20:02:12, 3.42it/s] 34%|███▎ | 124616/371472 [9:54:42<19:42:15, 3.48it/s] 34%|███▎ | 124617/371472 [9:54:42<19:12:32, 3.57it/s] 34%|███▎ | 124618/371472 [9:54:42<19:13:45, 3.57it/s] 34%|███▎ | 124619/371472 [9:54:43<19:00:29, 3.61it/s] 34%|███▎ | 124620/371472 [9:54:43<18:38:51, 3.68it/s] {'loss': 3.0706, 'learning_rate': 6.983936305457993e-07, 'epoch': 5.37} + 34%|███▎ | 124620/371472 [9:54:43<18:38:51, 3.68it/s] 34%|███▎ | 124621/371472 [9:54:43<18:00:10, 3.81it/s] 34%|███▎ | 124622/371472 [9:54:43<18:57:09, 3.62it/s] 34%|███▎ | 124623/371472 [9:54:44<18:26:41, 3.72it/s] 34%|███▎ | 124624/371472 [9:54:44<19:03:22, 3.60it/s] 34%|███▎ | 124625/371472 [9:54:44<18:24:10, 3.73it/s] 34%|███▎ | 124626/371472 [9:54:44<18:00:38, 3.81it/s] 34%|███▎ | 124627/371472 [9:54:45<17:35:11, 3.90it/s] 34%|███▎ | 124628/371472 [9:54:45<17:14:57, 3.98it/s] 34%|███▎ | 124629/371472 [9:54:45<16:59:50, 4.03it/s] 34%|███▎ | 124630/371472 [9:54:45<17:03:09, 4.02it/s] 34%|███▎ | 124631/371472 [9:54:46<17:19:36, 3.96it/s] 34%|███▎ | 124632/371472 [9:54:46<17:03:21, 4.02it/s] 34%|███▎ | 124633/371472 [9:54:46<17:22:51, 3.94it/s] 34%|███▎ | 124634/371472 [9:54:46<17:45:36, 3.86it/s] 34%|███▎ | 124635/371472 [9:54:47<17:41:49, 3.87it/s] 34%|███▎ | 124636/371472 [9:54:47<17:43:26, 3.87it/s] 34%|███▎ | 124637/371472 [9:54:47<21:02:49, 3.26it/s] 34%|███▎ | 124638/371472 [9:54:48<20:11:44, 3.40it/s] 34%|███▎ | 124639/371472 [9:54:48<19:12:44, 3.57it/s] 34%|███▎ | 124640/371472 [9:54:48<19:22:23, 3.54it/s] {'loss': 3.1049, 'learning_rate': 6.983451485703204e-07, 'epoch': 5.37} + 34%|███▎ | 124640/371472 [9:54:48<19:22:23, 3.54it/s] 34%|███▎ | 124641/371472 [9:54:48<19:55:44, 3.44it/s] 34%|███▎ | 124642/371472 [9:54:49<19:11:44, 3.57it/s] 34%|███▎ | 124643/371472 [9:54:49<19:20:44, 3.54it/s] 34%|███▎ | 124644/371472 [9:54:49<18:40:29, 3.67it/s] 34%|███▎ | 124645/371472 [9:54:49<18:42:46, 3.66it/s] 34%|███▎ | 124646/371472 [9:54:50<18:21:21, 3.74it/s] 34%|███▎ | 124647/371472 [9:54:50<17:47:14, 3.85it/s] 34%|███▎ | 124648/371472 [9:54:50<18:16:55, 3.75it/s] 34%|███▎ | 124649/371472 [9:54:51<18:29:59, 3.71it/s] 34%|███▎ | 124650/371472 [9:54:51<18:58:18, 3.61it/s] 34%|███▎ | 124651/371472 [9:54:51<19:01:53, 3.60it/s] 34%|███▎ | 124652/371472 [9:54:51<18:45:37, 3.65it/s] 34%|███▎ | 124653/371472 [9:54:52<18:16:03, 3.75it/s] 34%|███▎ | 124654/371472 [9:54:52<17:57:16, 3.82it/s] 34%|███▎ | 124655/371472 [9:54:52<17:43:35, 3.87it/s] 34%|███▎ | 124656/371472 [9:54:52<17:29:06, 3.92it/s] 34%|███▎ | 124657/371472 [9:54:53<19:41:13, 3.48it/s] 34%|███▎ | 124658/371472 [9:54:53<18:52:05, 3.63it/s] 34%|███▎ | 124659/371472 [9:54:53<18:53:44, 3.63it/s] 34%|███▎ | 124660/371472 [9:54:54<18:27:44, 3.71it/s] {'loss': 3.2923, 'learning_rate': 6.982966665948415e-07, 'epoch': 5.37} + 34%|███▎ | 124660/371472 [9:54:54<18:27:44, 3.71it/s] 34%|███▎ | 124661/371472 [9:54:54<18:43:09, 3.66it/s] 34%|███▎ | 124662/371472 [9:54:54<18:17:12, 3.75it/s] 34%|███▎ | 124663/371472 [9:54:54<18:23:04, 3.73it/s] 34%|███▎ | 124664/371472 [9:54:55<18:48:34, 3.64it/s] 34%|███▎ | 124665/371472 [9:54:55<18:44:14, 3.66it/s] 34%|███▎ | 124666/371472 [9:54:55<18:20:55, 3.74it/s] 34%|███▎ | 124667/371472 [9:54:55<18:12:44, 3.76it/s] 34%|███▎ | 124668/371472 [9:54:56<19:14:01, 3.56it/s] 34%|███▎ | 124669/371472 [9:54:56<19:25:09, 3.53it/s] 34%|███▎ | 124670/371472 [9:54:56<18:48:46, 3.64it/s] 34%|███▎ | 124671/371472 [9:54:57<18:59:43, 3.61it/s] 34%|███▎ | 124672/371472 [9:54:57<18:39:51, 3.67it/s] 34%|███▎ | 124673/371472 [9:54:57<19:11:22, 3.57it/s] 34%|███▎ | 124674/371472 [9:54:57<20:33:25, 3.33it/s] 34%|███▎ | 124675/371472 [9:54:58<21:01:53, 3.26it/s] 34%|███▎ | 124676/371472 [9:54:58<20:04:19, 3.42it/s] 34%|███▎ | 124677/371472 [9:54:58<19:58:50, 3.43it/s] 34%|███▎ | 124678/371472 [9:54:59<19:38:06, 3.49it/s] 34%|███▎ | 124679/371472 [9:54:59<18:49:26, 3.64it/s] 34%|███▎ | 124680/371472 [9:54:59<20:34:09, 3.33it/s] {'loss': 3.3227, 'learning_rate': 6.982481846193626e-07, 'epoch': 5.37} + 34%|███▎ | 124680/371472 [9:54:59<20:34:09, 3.33it/s] 34%|███▎ | 124681/371472 [9:54:59<19:15:30, 3.56it/s] 34%|███▎ | 124682/371472 [9:55:00<18:37:07, 3.68it/s] 34%|███▎ | 124683/371472 [9:55:00<17:53:46, 3.83it/s] 34%|███▎ | 124684/371472 [9:55:00<18:00:00, 3.81it/s] 34%|███▎ | 124685/371472 [9:55:00<18:08:04, 3.78it/s] 34%|███▎ | 124686/371472 [9:55:01<18:43:54, 3.66it/s] 34%|███▎ | 124687/371472 [9:55:01<19:37:50, 3.49it/s] 34%|███▎ | 124688/371472 [9:55:01<19:40:24, 3.48it/s] 34%|███▎ | 124689/371472 [9:55:02<18:55:55, 3.62it/s] 34%|███▎ | 124690/371472 [9:55:02<18:50:17, 3.64it/s] 34%|███▎ | 124691/371472 [9:55:02<19:05:35, 3.59it/s] 34%|███▎ | 124692/371472 [9:55:02<18:49:29, 3.64it/s] 34%|███▎ | 124693/371472 [9:55:03<18:44:58, 3.66it/s] 34%|███▎ | 124694/371472 [9:55:03<19:43:56, 3.47it/s] 34%|███▎ | 124695/371472 [9:55:03<21:20:44, 3.21it/s] 34%|███▎ | 124696/371472 [9:55:04<20:39:35, 3.32it/s] 34%|███▎ | 124697/371472 [9:55:04<19:46:59, 3.47it/s] 34%|███▎ | 124698/371472 [9:55:04<19:56:51, 3.44it/s] 34%|███▎ | 124699/371472 [9:55:04<19:43:37, 3.47it/s] 34%|███▎ | 124700/371472 [9:55:05<19:51:05, 3.45it/s] {'loss': 3.4115, 'learning_rate': 6.981997026438837e-07, 'epoch': 5.37} + 34%|███▎ | 124700/371472 [9:55:05<19:51:05, 3.45it/s] 34%|███▎ | 124701/371472 [9:55:05<19:42:13, 3.48it/s] 34%|███▎ | 124702/371472 [9:55:05<19:58:18, 3.43it/s] 34%|███▎ | 124703/371472 [9:55:06<19:37:24, 3.49it/s] 34%|███▎ | 124704/371472 [9:55:06<19:01:03, 3.60it/s] 34%|███▎ | 124705/371472 [9:55:06<19:32:26, 3.51it/s] 34%|███▎ | 124706/371472 [9:55:06<18:23:38, 3.73it/s] 34%|███▎ | 124707/371472 [9:55:07<17:50:01, 3.84it/s] 34%|███▎ | 124708/371472 [9:55:07<17:51:30, 3.84it/s] 34%|███▎ | 124709/371472 [9:55:07<18:45:55, 3.65it/s] 34%|███▎ | 124710/371472 [9:55:08<18:30:04, 3.70it/s] 34%|███▎ | 124711/371472 [9:55:08<17:53:41, 3.83it/s] 34%|███▎ | 124712/371472 [9:55:08<17:52:34, 3.83it/s] 34%|███▎ | 124713/371472 [9:55:08<18:07:13, 3.78it/s] 34%|███▎ | 124714/371472 [9:55:09<18:06:33, 3.79it/s] 34%|███▎ | 124715/371472 [9:55:09<18:28:05, 3.71it/s] 34%|███▎ | 124716/371472 [9:55:09<18:04:42, 3.79it/s] 34%|███▎ | 124717/371472 [9:55:09<18:34:21, 3.69it/s] 34%|███▎ | 124718/371472 [9:55:10<17:55:26, 3.82it/s] 34%|███▎ | 124719/371472 [9:55:10<17:43:13, 3.87it/s] 34%|███▎ | 124720/371472 [9:55:10<18:10:14, 3.77it/s] {'loss': 3.2173, 'learning_rate': 6.981512206684048e-07, 'epoch': 5.37} + 34%|███▎ | 124720/371472 [9:55:10<18:10:14, 3.77it/s] 34%|███▎ | 124721/371472 [9:55:10<19:33:08, 3.51it/s] 34%|███▎ | 124722/371472 [9:55:11<20:30:21, 3.34it/s] 34%|███▎ | 124723/371472 [9:55:11<19:26:22, 3.53it/s] 34%|███▎ | 124724/371472 [9:55:11<18:46:27, 3.65it/s] 34%|███▎ | 124725/371472 [9:55:12<18:12:08, 3.77it/s] 34%|███▎ | 124726/371472 [9:55:12<18:48:28, 3.64it/s] 34%|███▎ | 124727/371472 [9:55:12<18:33:18, 3.69it/s] 34%|███▎ | 124728/371472 [9:55:12<18:51:28, 3.63it/s] 34%|███▎ | 124729/371472 [9:55:13<18:19:17, 3.74it/s] 34%|███▎ | 124730/371472 [9:55:13<18:03:51, 3.79it/s] 34%|███▎ | 124731/371472 [9:55:13<17:28:16, 3.92it/s] 34%|███▎ | 124732/371472 [9:55:13<17:59:15, 3.81it/s] 34%|███▎ | 124733/371472 [9:55:14<17:31:31, 3.91it/s] 34%|███▎ | 124734/371472 [9:55:14<17:41:54, 3.87it/s] 34%|███▎ | 124735/371472 [9:55:14<17:54:17, 3.83it/s] 34%|███▎ | 124736/371472 [9:55:14<17:46:38, 3.86it/s] 34%|███▎ | 124737/371472 [9:55:15<18:42:29, 3.66it/s] 34%|███▎ | 124738/371472 [9:55:15<18:08:21, 3.78it/s] 34%|███▎ | 124739/371472 [9:55:15<19:10:43, 3.57it/s] 34%|███▎ | 124740/371472 [9:55:16<18:31:50, 3.70it/s] {'loss': 3.2662, 'learning_rate': 6.981027386929259e-07, 'epoch': 5.37} + 34%|███▎ | 124740/371472 [9:55:16<18:31:50, 3.70it/s] 34%|███▎ | 124741/371472 [9:55:16<18:10:17, 3.77it/s] 34%|███▎ | 124742/371472 [9:55:16<18:15:44, 3.75it/s] 34%|███▎ | 124743/371472 [9:55:16<18:20:26, 3.74it/s] 34%|███▎ | 124744/371472 [9:55:17<18:13:40, 3.76it/s] 34%|███▎ | 124745/371472 [9:55:17<18:40:28, 3.67it/s] 34%|███▎ | 124746/371472 [9:55:17<18:04:31, 3.79it/s] 34%|███▎ | 124747/371472 [9:55:17<19:40:55, 3.48it/s] 34%|███▎ | 124748/371472 [9:55:18<19:23:27, 3.53it/s] 34%|███▎ | 124749/371472 [9:55:18<18:56:25, 3.62it/s] 34%|███▎ | 124750/371472 [9:55:18<18:54:28, 3.62it/s] 34%|███▎ | 124751/371472 [9:55:19<19:11:32, 3.57it/s] 34%|███▎ | 124752/371472 [9:55:19<18:30:38, 3.70it/s] 34%|███▎ | 124753/371472 [9:55:19<18:47:18, 3.65it/s] 34%|███▎ | 124754/371472 [9:55:19<18:56:23, 3.62it/s] 34%|███▎ | 124755/371472 [9:55:20<18:43:49, 3.66it/s] 34%|███▎ | 124756/371472 [9:55:20<18:01:31, 3.80it/s] 34%|███▎ | 124757/371472 [9:55:20<17:42:37, 3.87it/s] 34%|███▎ | 124758/371472 [9:55:20<19:41:04, 3.48it/s] 34%|███▎ | 124759/371472 [9:55:21<18:57:55, 3.61it/s] 34%|███▎ | 124760/371472 [9:55:21<18:28:50, 3.71it/s] {'loss': 3.2279, 'learning_rate': 6.98054256717447e-07, 'epoch': 5.37} + 34%|███▎ | 124760/371472 [9:55:21<18:28:50, 3.71it/s] 34%|███▎ | 124761/371472 [9:55:21<18:10:27, 3.77it/s] 34%|███▎ | 124762/371472 [9:55:22<18:28:11, 3.71it/s] 34%|███▎ | 124763/371472 [9:55:22<18:53:53, 3.63it/s] 34%|███▎ | 124764/371472 [9:55:22<19:09:54, 3.58it/s] 34%|███▎ | 124765/371472 [9:55:22<19:26:31, 3.52it/s] 34%|███▎ | 124766/371472 [9:55:23<19:04:21, 3.59it/s] 34%|███▎ | 124767/371472 [9:55:23<19:29:24, 3.52it/s] 34%|███▎ | 124768/371472 [9:55:23<18:53:21, 3.63it/s] 34%|███▎ | 124769/371472 [9:55:23<18:44:12, 3.66it/s] 34%|███▎ | 124770/371472 [9:55:24<18:41:52, 3.67it/s] 34%|███▎ | 124771/371472 [9:55:24<17:59:06, 3.81it/s] 34%|███▎ | 124772/371472 [9:55:24<18:00:52, 3.80it/s] 34%|███▎ | 124773/371472 [9:55:25<17:42:11, 3.87it/s] 34%|███▎ | 124774/371472 [9:55:25<19:08:58, 3.58it/s] 34%|███▎ | 124775/371472 [9:55:25<18:43:35, 3.66it/s] 34%|███▎ | 124776/371472 [9:55:25<18:21:38, 3.73it/s] 34%|███▎ | 124777/371472 [9:55:26<18:53:35, 3.63it/s] 34%|███▎ | 124778/371472 [9:55:26<19:05:13, 3.59it/s] 34%|███▎ | 124779/371472 [9:55:26<18:46:50, 3.65it/s] 34%|███▎ | 124780/371472 [9:55:27<19:33:38, 3.50it/s] {'loss': 3.2805, 'learning_rate': 6.980057747419682e-07, 'epoch': 5.37} + 34%|███▎ | 124780/371472 [9:55:27<19:33:38, 3.50it/s] 34%|███▎ | 124781/371472 [9:55:27<19:44:12, 3.47it/s] 34%|███▎ | 124782/371472 [9:55:27<19:27:32, 3.52it/s] 34%|███▎ | 124783/371472 [9:55:27<22:01:35, 3.11it/s] 34%|███▎ | 124784/371472 [9:55:28<20:25:25, 3.36it/s] 34%|███▎ | 124785/371472 [9:55:28<19:41:01, 3.48it/s] 34%|███▎ | 124786/371472 [9:55:28<21:02:01, 3.26it/s] 34%|███▎ | 124787/371472 [9:55:29<20:58:02, 3.27it/s] 34%|███▎ | 124788/371472 [9:55:29<20:47:35, 3.30it/s] 34%|███▎ | 124789/371472 [9:55:29<21:37:29, 3.17it/s] 34%|███▎ | 124790/371472 [9:55:30<20:21:44, 3.37it/s] 34%|███▎ | 124791/371472 [9:55:30<19:12:27, 3.57it/s] 34%|███▎ | 124792/371472 [9:55:30<18:46:15, 3.65it/s] 34%|███▎ | 124793/371472 [9:55:30<18:55:54, 3.62it/s] 34%|███▎ | 124794/371472 [9:55:31<20:15:11, 3.38it/s] 34%|███▎ | 124795/371472 [9:55:31<20:01:43, 3.42it/s] 34%|███▎ | 124796/371472 [9:55:31<20:10:50, 3.40it/s] 34%|███▎ | 124797/371472 [9:55:32<22:59:03, 2.98it/s] 34%|███▎ | 124798/371472 [9:55:32<22:50:26, 3.00it/s] 34%|███▎ | 124799/371472 [9:55:32<21:22:01, 3.21it/s] 34%|███▎ | 124800/371472 [9:55:33<20:33:09, 3.33it/s] {'loss': 3.3974, 'learning_rate': 6.979572927664893e-07, 'epoch': 5.38} + 34%|███▎ | 124800/371472 [9:55:33<20:33:09, 3.33it/s] 34%|███▎ | 124801/371472 [9:55:33<19:34:35, 3.50it/s] 34%|███▎ | 124802/371472 [9:55:33<18:29:15, 3.71it/s] 34%|███▎ | 124803/371472 [9:55:33<21:29:07, 3.19it/s] 34%|███▎ | 124804/371472 [9:55:34<21:06:52, 3.25it/s] 34%|███▎ | 124805/371472 [9:55:34<20:23:54, 3.36it/s] 34%|███▎ | 124806/371472 [9:55:34<21:04:26, 3.25it/s] 34%|███▎ | 124807/371472 [9:55:35<21:16:29, 3.22it/s] 34%|███▎ | 124808/371472 [9:55:35<21:05:25, 3.25it/s] 34%|███▎ | 124809/371472 [9:55:35<19:49:17, 3.46it/s] 34%|███▎ | 124810/371472 [9:55:35<18:52:11, 3.63it/s] 34%|███▎ | 124811/371472 [9:55:36<18:42:41, 3.66it/s] 34%|███▎ | 124812/371472 [9:55:36<18:21:21, 3.73it/s] 34%|███▎ | 124813/371472 [9:55:36<18:33:50, 3.69it/s] 34%|███▎ | 124814/371472 [9:55:36<17:46:40, 3.85it/s] 34%|███▎ | 124815/371472 [9:55:37<17:46:57, 3.85it/s] 34%|███▎ | 124816/371472 [9:55:37<19:02:20, 3.60it/s] 34%|███▎ | 124817/371472 [9:55:37<19:52:12, 3.45it/s] 34%|███▎ | 124818/371472 [9:55:38<19:19:59, 3.54it/s] 34%|███▎ | 124819/371472 [9:55:38<19:46:42, 3.46it/s] 34%|███▎ | 124820/371472 [9:55:38<20:17:21, 3.38it/s] {'loss': 3.2605, 'learning_rate': 6.979088107910103e-07, 'epoch': 5.38} + 34%|███▎ | 124820/371472 [9:55:38<20:17:21, 3.38it/s] 34%|███▎ | 124821/371472 [9:55:39<19:56:13, 3.44it/s] 34%|███▎ | 124822/371472 [9:55:39<19:11:48, 3.57it/s] 34%|███▎ | 124823/371472 [9:55:39<18:43:27, 3.66it/s] 34%|███▎ | 124824/371472 [9:55:39<18:53:57, 3.63it/s] 34%|███▎ | 124825/371472 [9:55:40<18:37:00, 3.68it/s] 34%|███▎ | 124826/371472 [9:55:40<18:36:57, 3.68it/s] 34%|███▎ | 124827/371472 [9:55:40<18:26:57, 3.71it/s] 34%|███▎ | 124828/371472 [9:55:40<18:51:24, 3.63it/s] 34%|███▎ | 124829/371472 [9:55:41<19:01:23, 3.60it/s] 34%|███▎ | 124830/371472 [9:55:41<18:16:27, 3.75it/s] 34%|███▎ | 124831/371472 [9:55:41<17:58:23, 3.81it/s] 34%|███▎ | 124832/371472 [9:55:42<18:47:22, 3.65it/s] 34%|███▎ | 124833/371472 [9:55:42<19:26:31, 3.52it/s] 34%|███▎ | 124834/371472 [9:55:42<19:31:51, 3.51it/s] 34%|███▎ | 124835/371472 [9:55:42<20:25:39, 3.35it/s] 34%|███▎ | 124836/371472 [9:55:43<19:32:17, 3.51it/s] 34%|███▎ | 124837/371472 [9:55:43<18:43:06, 3.66it/s] 34%|███▎ | 124838/371472 [9:55:43<17:56:22, 3.82it/s] 34%|███▎ | 124839/371472 [9:55:44<23:17:06, 2.94it/s] 34%|███▎ | 124840/371472 [9:55:44<21:57:51, 3.12it/s] {'loss': 3.2753, 'learning_rate': 6.978603288155314e-07, 'epoch': 5.38} + 34%|███▎ | 124840/371472 [9:55:44<21:57:51, 3.12it/s] 34%|███▎ | 124841/371472 [9:55:44<20:04:08, 3.41it/s] 34%|███▎ | 124842/371472 [9:55:45<20:43:09, 3.31it/s] 34%|███▎ | 124843/371472 [9:55:45<21:03:25, 3.25it/s] 34%|███▎ | 124844/371472 [9:55:45<19:59:11, 3.43it/s] 34%|███▎ | 124845/371472 [9:55:45<19:28:21, 3.52it/s] 34%|███▎ | 124846/371472 [9:55:46<19:04:29, 3.59it/s] 34%|███▎ | 124847/371472 [9:55:46<18:49:09, 3.64it/s] 34%|███▎ | 124848/371472 [9:55:46<19:05:01, 3.59it/s] 34%|███▎ | 124849/371472 [9:55:46<19:51:27, 3.45it/s] 34%|███▎ | 124850/371472 [9:55:47<19:24:01, 3.53it/s] 34%|███▎ | 124851/371472 [9:55:47<18:39:27, 3.67it/s] 34%|███▎ | 124852/371472 [9:55:47<18:08:24, 3.78it/s] 34%|███▎ | 124853/371472 [9:55:48<18:26:01, 3.72it/s] 34%|███▎ | 124854/371472 [9:55:48<18:24:55, 3.72it/s] 34%|███▎ | 124855/371472 [9:55:48<19:08:07, 3.58it/s] 34%|███▎ | 124856/371472 [9:55:48<20:09:32, 3.40it/s] 34%|███▎ | 124857/371472 [9:55:49<19:07:18, 3.58it/s] 34%|███▎ | 124858/371472 [9:55:49<19:19:13, 3.55it/s] 34%|███▎ | 124859/371472 [9:55:49<19:13:56, 3.56it/s] 34%|███▎ | 124860/371472 [9:55:49<18:42:57, 3.66it/s] {'loss': 3.3599, 'learning_rate': 6.978118468400525e-07, 'epoch': 5.38} + 34%|███▎ | 124860/371472 [9:55:49<18:42:57, 3.66it/s] 34%|███▎ | 124861/371472 [9:55:50<18:45:07, 3.65it/s] 34%|███▎ | 124862/371472 [9:55:50<18:08:40, 3.78it/s] 34%|███▎ | 124863/371472 [9:55:50<18:07:28, 3.78it/s] 34%|███▎ | 124864/371472 [9:55:51<17:52:58, 3.83it/s] 34%|███▎ | 124865/371472 [9:55:51<18:44:31, 3.65it/s] 34%|███▎ | 124866/371472 [9:55:51<19:10:05, 3.57it/s] 34%|███▎ | 124867/371472 [9:55:51<18:38:42, 3.67it/s] 34%|███▎ | 124868/371472 [9:55:52<19:39:56, 3.48it/s] 34%|███▎ | 124869/371472 [9:55:52<19:30:36, 3.51it/s] 34%|███▎ | 124870/371472 [9:55:52<19:15:46, 3.56it/s] 34%|███▎ | 124871/371472 [9:55:53<18:42:34, 3.66it/s] 34%|███▎ | 124872/371472 [9:55:53<17:54:33, 3.82it/s] 34%|███▎ | 124873/371472 [9:55:53<19:02:04, 3.60it/s] 34%|███▎ | 124874/371472 [9:55:53<19:42:28, 3.48it/s] 34%|███▎ | 124875/371472 [9:55:54<19:41:44, 3.48it/s] 34%|███▎ | 124876/371472 [9:55:54<19:46:16, 3.46it/s] 34%|███▎ | 124877/371472 [9:55:54<21:01:31, 3.26it/s] 34%|███▎ | 124878/371472 [9:55:55<20:25:19, 3.35it/s] 34%|███▎ | 124879/371472 [9:55:55<20:11:04, 3.39it/s] 34%|███▎ | 124880/371472 [9:55:55<19:29:00, 3.52it/s] {'loss': 3.2229, 'learning_rate': 6.977633648645737e-07, 'epoch': 5.38} + 34%|███▎ | 124880/371472 [9:55:55<19:29:00, 3.52it/s] 34%|███▎ | 124881/371472 [9:55:55<19:08:09, 3.58it/s] 34%|███▎ | 124882/371472 [9:55:56<19:13:46, 3.56it/s] 34%|███▎ | 124883/371472 [9:55:56<19:08:30, 3.58it/s] 34%|███▎ | 124884/371472 [9:55:56<19:58:11, 3.43it/s] 34%|███▎ | 124885/371472 [9:55:57<19:30:34, 3.51it/s] 34%|███▎ | 124886/371472 [9:55:57<21:33:34, 3.18it/s] 34%|███▎ | 124887/371472 [9:55:57<20:27:59, 3.35it/s] 34%|███▎ | 124888/371472 [9:55:58<21:28:19, 3.19it/s] 34%|███▎ | 124889/371472 [9:55:58<20:06:47, 3.41it/s] 34%|███▎ | 124890/371472 [9:55:58<19:43:09, 3.47it/s] 34%|███▎ | 124891/371472 [9:55:58<19:06:59, 3.58it/s] 34%|███▎ | 124892/371472 [9:55:59<20:40:10, 3.31it/s] 34%|███▎ | 124893/371472 [9:55:59<19:47:17, 3.46it/s] 34%|███▎ | 124894/371472 [9:55:59<19:31:38, 3.51it/s] 34%|███▎ | 124895/371472 [9:56:00<20:14:35, 3.38it/s] 34%|███▎ | 124896/371472 [9:56:00<19:25:44, 3.53it/s] 34%|███▎ | 124897/371472 [9:56:00<19:28:01, 3.52it/s] 34%|███▎ | 124898/371472 [9:56:00<19:44:09, 3.47it/s] 34%|███▎ | 124899/371472 [9:56:01<19:02:40, 3.60it/s] 34%|███▎ | 124900/371472 [9:56:01<18:32:58, 3.69it/s] {'loss': 3.1242, 'learning_rate': 6.977148828890948e-07, 'epoch': 5.38} + 34%|███▎ | 124900/371472 [9:56:01<18:32:58, 3.69it/s] 34%|███▎ | 124901/371472 [9:56:01<18:10:24, 3.77it/s] 34%|███▎ | 124902/371472 [9:56:01<19:40:05, 3.48it/s] 34%|███▎ | 124903/371472 [9:56:02<20:14:59, 3.38it/s] 34%|███▎ | 124904/371472 [9:56:02<20:32:33, 3.33it/s] 34%|███▎ | 124905/371472 [9:56:02<19:38:54, 3.49it/s] 34%|███▎ | 124906/371472 [9:56:03<20:06:18, 3.41it/s] 34%|███▎ | 124907/371472 [9:56:03<19:39:50, 3.48it/s] 34%|███▎ | 124908/371472 [9:56:03<18:27:04, 3.71it/s] 34%|███▎ | 124909/371472 [9:56:03<17:52:06, 3.83it/s] 34%|███▎ | 124910/371472 [9:56:04<17:34:46, 3.90it/s] 34%|███▎ | 124911/371472 [9:56:04<18:15:18, 3.75it/s] 34%|███▎ | 124912/371472 [9:56:04<18:05:08, 3.79it/s] 34%|███▎ | 124913/371472 [9:56:04<18:42:34, 3.66it/s] 34%|███▎ | 124914/371472 [9:56:05<17:58:46, 3.81it/s] 34%|███▎ | 124915/371472 [9:56:05<19:26:14, 3.52it/s] 34%|███▎ | 124916/371472 [9:56:05<19:07:04, 3.58it/s] 34%|███▎ | 124917/371472 [9:56:06<20:43:45, 3.30it/s] 34%|███▎ | 124918/371472 [9:56:06<19:21:37, 3.54it/s] 34%|███▎ | 124919/371472 [9:56:06<18:37:38, 3.68it/s] 34%|███▎ | 124920/371472 [9:56:06<18:32:54, 3.69it/s] {'loss': 3.1544, 'learning_rate': 6.976664009136159e-07, 'epoch': 5.38} + 34%|███▎ | 124920/371472 [9:56:06<18:32:54, 3.69it/s] 34%|███▎ | 124921/371472 [9:56:07<18:51:06, 3.63it/s] 34%|███▎ | 124922/371472 [9:56:07<20:01:47, 3.42it/s] 34%|███▎ | 124923/371472 [9:56:07<20:32:29, 3.33it/s] 34%|███▎ | 124924/371472 [9:56:08<19:18:55, 3.55it/s] 34%|███▎ | 124925/371472 [9:56:08<19:24:04, 3.53it/s] 34%|███▎ | 124926/371472 [9:56:08<20:58:27, 3.27it/s] 34%|███▎ | 124927/371472 [9:56:09<20:01:46, 3.42it/s] 34%|███▎ | 124928/371472 [9:56:09<19:15:57, 3.55it/s] 34%|███▎ | 124929/371472 [9:56:09<19:05:31, 3.59it/s] 34%|███▎ | 124930/371472 [9:56:09<19:15:35, 3.56it/s] 34%|███▎ | 124931/371472 [9:56:10<20:05:57, 3.41it/s] 34%|███▎ | 124932/371472 [9:56:10<19:30:14, 3.51it/s] 34%|███▎ | 124933/371472 [9:56:10<19:32:41, 3.50it/s] 34%|███▎ | 124934/371472 [9:56:11<19:50:57, 3.45it/s] 34%|███▎ | 124935/371472 [9:56:11<19:47:36, 3.46it/s] 34%|███▎ | 124936/371472 [9:56:11<19:07:59, 3.58it/s] 34%|███▎ | 124937/371472 [9:56:11<18:47:44, 3.64it/s] 34%|███▎ | 124938/371472 [9:56:12<18:46:42, 3.65it/s] 34%|███▎ | 124939/371472 [9:56:12<18:42:28, 3.66it/s] 34%|███▎ | 124940/371472 [9:56:12<18:23:29, 3.72it/s] {'loss': 3.1571, 'learning_rate': 6.97617918938137e-07, 'epoch': 5.38} + 34%|███▎ | 124940/371472 [9:56:12<18:23:29, 3.72it/s] 34%|███▎ | 124941/371472 [9:56:12<18:35:02, 3.68it/s] 34%|███▎ | 124942/371472 [9:56:13<17:46:39, 3.85it/s] 34%|███▎ | 124943/371472 [9:56:13<17:57:46, 3.81it/s] 34%|███▎ | 124944/371472 [9:56:13<17:56:55, 3.82it/s] 34%|███▎ | 124945/371472 [9:56:13<17:49:21, 3.84it/s] 34%|███▎ | 124946/371472 [9:56:14<17:58:36, 3.81it/s] 34%|███▎ | 124947/371472 [9:56:14<18:22:22, 3.73it/s] 34%|███▎ | 124948/371472 [9:56:14<18:12:02, 3.76it/s] 34%|███▎ | 124949/371472 [9:56:14<17:51:27, 3.83it/s] 34%|███▎ | 124950/371472 [9:56:15<18:58:35, 3.61it/s] 34%|███▎ | 124951/371472 [9:56:15<18:22:16, 3.73it/s] 34%|███▎ | 124952/371472 [9:56:15<18:34:05, 3.69it/s] 34%|███▎ | 124953/371472 [9:56:16<19:50:54, 3.45it/s] 34%|███▎ | 124954/371472 [9:56:16<20:06:33, 3.41it/s] 34%|███▎ | 124955/371472 [9:56:16<20:20:29, 3.37it/s] 34%|███▎ | 124956/371472 [9:56:16<19:12:48, 3.56it/s] 34%|███▎ | 124957/371472 [9:56:17<18:45:36, 3.65it/s] 34%|███▎ | 124958/371472 [9:56:17<18:36:28, 3.68it/s] 34%|███▎ | 124959/371472 [9:56:17<18:19:24, 3.74it/s] 34%|███▎ | 124960/371472 [9:56:18<18:25:40, 3.72it/s] {'loss': 3.1765, 'learning_rate': 6.97569436962658e-07, 'epoch': 5.38} + 34%|███▎ | 124960/371472 [9:56:18<18:25:40, 3.72it/s] 34%|███▎ | 124961/371472 [9:56:18<19:15:24, 3.56it/s] 34%|███▎ | 124962/371472 [9:56:18<19:11:08, 3.57it/s] 34%|███▎ | 124963/371472 [9:56:18<19:22:09, 3.54it/s] 34%|███▎ | 124964/371472 [9:56:19<19:00:24, 3.60it/s] 34%|███▎ | 124965/371472 [9:56:19<18:27:16, 3.71it/s] 34%|███▎ | 124966/371472 [9:56:19<18:30:08, 3.70it/s] 34%|███▎ | 124967/371472 [9:56:19<18:05:29, 3.78it/s] 34%|███▎ | 124968/371472 [9:56:20<18:28:12, 3.71it/s] 34%|███▎ | 124969/371472 [9:56:20<18:01:14, 3.80it/s] 34%|███▎ | 124970/371472 [9:56:20<19:07:27, 3.58it/s] 34%|███▎ | 124971/371472 [9:56:21<18:55:40, 3.62it/s] 34%|███▎ | 124972/371472 [9:56:21<17:55:17, 3.82it/s] 34%|███▎ | 124973/371472 [9:56:21<17:59:19, 3.81it/s] 34%|███▎ | 124974/371472 [9:56:21<17:48:25, 3.85it/s] 34%|███▎ | 124975/371472 [9:56:22<17:26:29, 3.93it/s] 34%|███▎ | 124976/371472 [9:56:22<17:54:43, 3.82it/s] 34%|███▎ | 124977/371472 [9:56:22<18:26:19, 3.71it/s] 34%|███▎ | 124978/371472 [9:56:22<18:27:16, 3.71it/s] 34%|███▎ | 124979/371472 [9:56:23<19:04:12, 3.59it/s] 34%|███▎ | 124980/371472 [9:56:23<19:19:05, 3.54it/s] {'loss': 3.2518, 'learning_rate': 6.975209549871791e-07, 'epoch': 5.38} + 34%|███▎ | 124980/371472 [9:56:23<19:19:05, 3.54it/s] 34%|███▎ | 124981/371472 [9:56:23<19:20:08, 3.54it/s] 34%|███▎ | 124982/371472 [9:56:24<18:59:39, 3.60it/s] 34%|███▎ | 124983/371472 [9:56:24<19:14:35, 3.56it/s] 34%|███▎ | 124984/371472 [9:56:24<18:12:38, 3.76it/s] 34%|███▎ | 124985/371472 [9:56:24<20:04:05, 3.41it/s] 34%|███▎ | 124986/371472 [9:56:25<20:03:04, 3.41it/s] 34%|███▎ | 124987/371472 [9:56:25<19:28:11, 3.52it/s] 34%|███▎ | 124988/371472 [9:56:25<20:09:57, 3.40it/s] 34%|███▎ | 124989/371472 [9:56:26<19:30:25, 3.51it/s] 34%|███▎ | 124990/371472 [9:56:26<19:59:55, 3.42it/s] 34%|███▎ | 124991/371472 [9:56:26<21:02:08, 3.25it/s] 34%|███▎ | 124992/371472 [9:56:26<20:29:09, 3.34it/s] 34%|███▎ | 124993/371472 [9:56:27<19:37:59, 3.49it/s] 34%|███▎ | 124994/371472 [9:56:27<19:19:43, 3.54it/s] 34%|███▎ | 124995/371472 [9:56:27<18:49:22, 3.64it/s] 34%|███▎ | 124996/371472 [9:56:28<18:37:16, 3.68it/s] 34%|███▎ | 124997/371472 [9:56:28<17:49:19, 3.84it/s] 34%|███▎ | 124998/371472 [9:56:28<18:00:54, 3.80it/s] 34%|███▎ | 124999/371472 [9:56:28<17:47:30, 3.85it/s] 34%|███▎ | 125000/371472 [9:56:29<19:45:13, 3.47it/s] {'loss': 3.2307, 'learning_rate': 6.974724730117004e-07, 'epoch': 5.38} + 34%|███▎ | 125000/371472 [9:56:29<19:45:13, 3.47it/s] 34%|███▎ | 125001/371472 [9:56:29<19:03:17, 3.59it/s] 34%|███▎ | 125002/371472 [9:56:29<18:28:37, 3.71it/s] 34%|███▎ | 125003/371472 [9:56:29<18:50:23, 3.63it/s] 34%|███▎ | 125004/371472 [9:56:30<19:11:08, 3.57it/s] 34%|███▎ | 125005/371472 [9:56:30<19:33:40, 3.50it/s] 34%|███▎ | 125006/371472 [9:56:30<19:33:48, 3.50it/s] 34%|███▎ | 125007/371472 [9:56:31<20:16:37, 3.38it/s] 34%|███▎ | 125008/371472 [9:56:31<19:13:31, 3.56it/s] 34%|███▎ | 125009/371472 [9:56:31<19:37:12, 3.49it/s] 34%|███▎ | 125010/371472 [9:56:31<19:53:58, 3.44it/s] 34%|███▎ | 125011/371472 [9:56:32<19:29:18, 3.51it/s] 34%|███▎ | 125012/371472 [9:56:32<20:13:37, 3.38it/s] 34%|███▎ | 125013/371472 [9:56:32<19:08:37, 3.58it/s] 34%|███▎ | 125014/371472 [9:56:33<20:10:08, 3.39it/s] 34%|███▎ | 125015/371472 [9:56:33<21:40:47, 3.16it/s] 34%|███▎ | 125016/371472 [9:56:33<20:41:56, 3.31it/s] 34%|███▎ | 125017/371472 [9:56:34<19:28:14, 3.52it/s] 34%|███▎ | 125018/371472 [9:56:34<18:47:04, 3.64it/s] 34%|███▎ | 125019/371472 [9:56:34<20:52:54, 3.28it/s] 34%|███▎ | 125020/371472 [9:56:34<19:50:12, 3.45it/s] {'loss': 3.3902, 'learning_rate': 6.974239910362214e-07, 'epoch': 5.38} + 34%|███▎ | 125020/371472 [9:56:34<19:50:12, 3.45it/s] 34%|███▎ | 125021/371472 [9:56:35<19:43:05, 3.47it/s] 34%|███▎ | 125022/371472 [9:56:35<19:01:49, 3.60it/s] 34%|███▎ | 125023/371472 [9:56:35<19:03:47, 3.59it/s] 34%|███▎ | 125024/371472 [9:56:36<19:17:49, 3.55it/s] 34%|███▎ | 125025/371472 [9:56:36<19:46:15, 3.46it/s] 34%|███▎ | 125026/371472 [9:56:36<18:43:50, 3.65it/s] 34%|███▎ | 125027/371472 [9:56:36<18:49:23, 3.64it/s] 34%|███▎ | 125028/371472 [9:56:37<18:59:57, 3.60it/s] 34%|███▎ | 125029/371472 [9:56:37<18:43:02, 3.66it/s] 34%|███▎ | 125030/371472 [9:56:37<18:33:16, 3.69it/s] 34%|███▎ | 125031/371472 [9:56:37<17:53:51, 3.82it/s] 34%|███▎ | 125032/371472 [9:56:38<17:43:51, 3.86it/s] 34%|███▎ | 125033/371472 [9:56:38<17:38:55, 3.88it/s] 34%|███▎ | 125034/371472 [9:56:38<18:06:19, 3.78it/s] 34%|███▎ | 125035/371472 [9:56:38<18:27:42, 3.71it/s] 34%|███▎ | 125036/371472 [9:56:39<18:45:52, 3.65it/s] 34%|███▎ | 125037/371472 [9:56:39<18:18:23, 3.74it/s] 34%|███▎ | 125038/371472 [9:56:39<17:52:48, 3.83it/s] 34%|███▎ | 125039/371472 [9:56:40<18:13:47, 3.76it/s] 34%|███▎ | 125040/371472 [9:56:40<18:17:48, 3.74it/s] {'loss': 3.1286, 'learning_rate': 6.973755090607425e-07, 'epoch': 5.39} + 34%|███▎ | 125040/371472 [9:56:40<18:17:48, 3.74it/s] 34%|███▎ | 125041/371472 [9:56:40<20:03:28, 3.41it/s] 34%|███▎ | 125042/371472 [9:56:40<20:38:41, 3.32it/s] 34%|███▎ | 125043/371472 [9:56:41<21:13:55, 3.22it/s] 34%|███▎ | 125044/371472 [9:56:41<21:02:20, 3.25it/s] 34%|███▎ | 125045/371472 [9:56:41<19:58:54, 3.43it/s] 34%|███▎ | 125046/371472 [9:56:42<19:18:56, 3.54it/s] 34%|███▎ | 125047/371472 [9:56:42<18:54:38, 3.62it/s] 34%|███▎ | 125048/371472 [9:56:42<19:21:10, 3.54it/s] 34%|███▎ | 125049/371472 [9:56:43<20:20:49, 3.36it/s] 34%|███▎ | 125050/371472 [9:56:43<19:39:28, 3.48it/s] 34%|███▎ | 125051/371472 [9:56:43<19:38:04, 3.49it/s] 34%|███▎ | 125052/371472 [9:56:43<19:24:10, 3.53it/s] 34%|███▎ | 125053/371472 [9:56:44<19:08:52, 3.57it/s] 34%|███▎ | 125054/371472 [9:56:44<19:02:45, 3.59it/s] 34%|███▎ | 125055/371472 [9:56:44<20:53:31, 3.28it/s] 34%|███▎ | 125056/371472 [9:56:45<20:10:46, 3.39it/s] 34%|███▎ | 125057/371472 [9:56:45<20:24:45, 3.35it/s] 34%|███▎ | 125058/371472 [9:56:45<19:06:00, 3.58it/s] 34%|███▎ | 125059/371472 [9:56:45<18:30:22, 3.70it/s] 34%|███▎ | 125060/371472 [9:56:46<18:48:28, 3.64it/s] {'loss': 3.1218, 'learning_rate': 6.973270270852636e-07, 'epoch': 5.39} + 34%|███▎ | 125060/371472 [9:56:46<18:48:28, 3.64it/s] 34%|███▎ | 125061/371472 [9:56:46<18:43:23, 3.66it/s] 34%|███▎ | 125062/371472 [9:56:46<18:57:37, 3.61it/s] 34%|███▎ | 125063/371472 [9:56:46<20:10:06, 3.39it/s] 34%|███▎ | 125064/371472 [9:56:47<19:48:26, 3.46it/s] 34%|███▎ | 125065/371472 [9:56:47<19:09:11, 3.57it/s] 34%|█���█▎ | 125066/371472 [9:56:47<18:45:06, 3.65it/s] 34%|███▎ | 125067/371472 [9:56:48<18:46:21, 3.65it/s] 34%|███▎ | 125068/371472 [9:56:48<18:32:16, 3.69it/s] 34%|███▎ | 125069/371472 [9:56:48<17:56:24, 3.82it/s] 34%|███▎ | 125070/371472 [9:56:48<18:19:15, 3.74it/s] 34%|███▎ | 125071/371472 [9:56:49<18:00:41, 3.80it/s] 34%|███▎ | 125072/371472 [9:56:49<17:47:14, 3.85it/s] 34%|███▎ | 125073/371472 [9:56:49<18:00:20, 3.80it/s] 34%|███▎ | 125074/371472 [9:56:49<19:42:52, 3.47it/s] 34%|███▎ | 125075/371472 [9:56:50<19:38:03, 3.49it/s] 34%|███▎ | 125076/371472 [9:56:50<18:48:01, 3.64it/s] 34%|███▎ | 125077/371472 [9:56:50<18:01:25, 3.80it/s] 34%|███▎ | 125078/371472 [9:56:50<18:00:19, 3.80it/s] 34%|███▎ | 125079/371472 [9:56:51<18:33:48, 3.69it/s] 34%|███▎ | 125080/371472 [9:56:51<18:47:17, 3.64it/s] {'loss': 3.0916, 'learning_rate': 6.972785451097847e-07, 'epoch': 5.39} + 34%|███▎ | 125080/371472 [9:56:51<18:47:17, 3.64it/s] 34%|███▎ | 125081/371472 [9:56:51<18:23:22, 3.72it/s] 34%|███▎ | 125082/371472 [9:56:52<19:10:44, 3.57it/s] 34%|███▎ | 125083/371472 [9:56:52<18:55:02, 3.62it/s] 34%|███▎ | 125084/371472 [9:56:52<20:13:36, 3.38it/s] 34%|███▎ | 125085/371472 [9:56:52<19:31:35, 3.51it/s] 34%|███▎ | 125086/371472 [9:56:53<18:41:37, 3.66it/s] 34%|███▎ | 125087/371472 [9:56:53<20:13:54, 3.38it/s] 34%|███▎ | 125088/371472 [9:56:53<20:10:06, 3.39it/s] 34%|███▎ | 125089/371472 [9:56:54<20:22:58, 3.36it/s] 34%|███▎ | 125090/371472 [9:56:54<20:49:42, 3.29it/s] 34%|███▎ | 125091/371472 [9:56:54<21:17:13, 3.22it/s] 34%|███▎ | 125092/371472 [9:56:55<22:35:42, 3.03it/s] 34%|███▎ | 125093/371472 [9:56:55<21:12:53, 3.23it/s] 34%|███▎ | 125094/371472 [9:56:55<19:49:11, 3.45it/s] 34%|███▎ | 125095/371472 [9:56:55<19:47:50, 3.46it/s] 34%|███▎ | 125096/371472 [9:56:56<19:16:07, 3.55it/s] 34%|███▎ | 125097/371472 [9:56:56<19:06:56, 3.58it/s] 34%|███▎ | 125098/371472 [9:56:56<19:34:31, 3.50it/s] 34%|███▎ | 125099/371472 [9:56:57<20:38:25, 3.32it/s] 34%|███▎ | 125100/371472 [9:56:57<22:36:26, 3.03it/s] {'loss': 3.2104, 'learning_rate': 6.972300631343058e-07, 'epoch': 5.39} + 34%|███▎ | 125100/371472 [9:56:57<22:36:26, 3.03it/s] 34%|███▎ | 125101/371472 [9:56:57<21:59:00, 3.11it/s] 34%|███▎ | 125102/371472 [9:56:58<21:50:48, 3.13it/s] 34%|███▎ | 125103/371472 [9:56:58<22:52:29, 2.99it/s] 34%|███▎ | 125104/371472 [9:56:58<23:18:23, 2.94it/s] 34%|███▎ | 125105/371472 [9:56:59<22:08:34, 3.09it/s] 34%|███▎ | 125106/371472 [9:56:59<22:01:24, 3.11it/s] 34%|███▎ | 125107/371472 [9:56:59<21:27:17, 3.19it/s] 34%|███▎ | 125108/371472 [9:57:00<20:22:56, 3.36it/s] 34%|███▎ | 125109/371472 [9:57:00<19:25:29, 3.52it/s] 34%|███▎ | 125110/371472 [9:57:00<18:34:54, 3.68it/s] 34%|███▎ | 125111/371472 [9:57:00<18:58:24, 3.61it/s] 34%|███▎ | 125112/371472 [9:57:01<19:18:25, 3.54it/s] 34%|███▎ | 125113/371472 [9:57:01<20:37:14, 3.32it/s] 34%|███▎ | 125114/371472 [9:57:01<20:27:06, 3.35it/s] 34%|███▎ | 125115/371472 [9:57:02<20:11:08, 3.39it/s] 34%|███▎ | 125116/371472 [9:57:02<19:43:23, 3.47it/s] 34%|███▎ | 125117/371472 [9:57:02<19:54:02, 3.44it/s] 34%|███▎ | 125118/371472 [9:57:02<19:47:37, 3.46it/s] 34%|███▎ | 125119/371472 [9:57:03<19:43:13, 3.47it/s] 34%|███▎ | 125120/371472 [9:57:03<19:23:00, 3.53it/s] {'loss': 3.2597, 'learning_rate': 6.971815811588269e-07, 'epoch': 5.39} + 34%|███▎ | 125120/371472 [9:57:03<19:23:00, 3.53it/s] 34%|███▎ | 125121/371472 [9:57:03<19:14:44, 3.56it/s] 34%|███▎ | 125122/371472 [9:57:04<19:08:34, 3.57it/s] 34%|███▎ | 125123/371472 [9:57:04<18:49:32, 3.63it/s] 34%|███▎ | 125124/371472 [9:57:04<18:04:55, 3.78it/s] 34%|███▎ | 125125/371472 [9:57:04<18:54:35, 3.62it/s] 34%|███▎ | 125126/371472 [9:57:05<18:17:12, 3.74it/s] 34%|███▎ | 125127/371472 [9:57:05<18:34:40, 3.68it/s] 34%|███▎ | 125128/371472 [9:57:05<18:45:14, 3.65it/s] 34%|███▎ | 125129/371472 [9:57:05<18:11:25, 3.76it/s] 34%|███▎ | 125130/371472 [9:57:06<17:46:24, 3.85it/s] 34%|███▎ | 125131/371472 [9:57:06<18:28:02, 3.71it/s] 34%|███▎ | 125132/371472 [9:57:06<17:56:43, 3.81it/s] 34%|███▎ | 125133/371472 [9:57:06<17:46:40, 3.85it/s] 34%|███▎ | 125134/371472 [9:57:07<17:38:30, 3.88it/s] 34%|███▎ | 125135/371472 [9:57:07<18:14:52, 3.75it/s] 34%|███▎ | 125136/371472 [9:57:07<19:39:43, 3.48it/s] 34%|███▎ | 125137/371472 [9:57:08<19:31:18, 3.51it/s] 34%|███▎ | 125138/371472 [9:57:08<19:24:18, 3.53it/s] 34%|███▎ | 125139/371472 [9:57:08<19:08:46, 3.57it/s] 34%|███▎ | 125140/371472 [9:57:08<19:35:07, 3.49it/s] {'loss': 3.2439, 'learning_rate': 6.97133099183348e-07, 'epoch': 5.39} + 34%|███▎ | 125140/371472 [9:57:08<19:35:07, 3.49it/s] 34%|███▎ | 125141/371472 [9:57:09<20:33:27, 3.33it/s] 34%|███▎ | 125142/371472 [9:57:09<19:30:01, 3.51it/s] 34%|███▎ | 125143/371472 [9:57:09<19:20:34, 3.54it/s] 34%|███▎ | 125144/371472 [9:57:10<18:27:27, 3.71it/s] 34%|███▎ | 125145/371472 [9:57:10<19:36:01, 3.49it/s] 34%|███▎ | 125146/371472 [9:57:10<18:46:00, 3.65it/s] 34%|███▎ | 125147/371472 [9:57:10<18:54:23, 3.62it/s] 34%|███▎ | 125148/371472 [9:57:11<18:28:48, 3.70it/s] 34%|███▎ | 125149/371472 [9:57:11<17:40:13, 3.87it/s] 34%|███▎ | 125150/371472 [9:57:11<18:21:19, 3.73it/s] 34%|███▎ | 125151/371472 [9:57:12<20:03:21, 3.41it/s] 34%|███▎ | 125152/371472 [9:57:12<19:06:00, 3.58it/s] 34%|███▎ | 125153/371472 [9:57:12<19:06:13, 3.58it/s] 34%|███▎ | 125154/371472 [9:57:12<19:12:37, 3.56it/s] 34%|███▎ | 125155/371472 [9:57:13<19:13:29, 3.56it/s] 34%|███▎ | 125156/371472 [9:57:13<19:00:31, 3.60it/s] 34%|███▎ | 125157/371472 [9:57:13<19:32:05, 3.50it/s] 34%|███▎ | 125158/371472 [9:57:13<18:47:21, 3.64it/s] 34%|███▎ | 125159/371472 [9:57:14<19:12:41, 3.56it/s] 34%|███▎ | 125160/371472 [9:57:14<18:25:30, 3.71it/s] {'loss': 3.4474, 'learning_rate': 6.970846172078691e-07, 'epoch': 5.39} + 34%|███▎ | 125160/371472 [9:57:14<18:25:30, 3.71it/s] 34%|███▎ | 125161/371472 [9:57:14<18:17:17, 3.74it/s] 34%|███▎ | 125162/371472 [9:57:14<17:54:11, 3.82it/s] 34%|███▎ | 125163/371472 [9:57:15<17:52:39, 3.83it/s] 34%|███▎ | 125164/371472 [9:57:15<18:23:42, 3.72it/s] 34%|███▎ | 125165/371472 [9:57:15<19:35:35, 3.49it/s] 34%|███▎ | 125166/371472 [9:57:16<18:45:04, 3.65it/s] 34%|███▎ | 125167/371472 [9:57:16<19:43:43, 3.47it/s] 34%|███▎ | 125168/371472 [9:57:16<19:14:01, 3.56it/s] 34%|███▎ | 125169/371472 [9:57:17<20:21:04, 3.36it/s] 34%|███▎ | 125170/371472 [9:57:17<20:00:12, 3.42it/s] 34%|███▎ | 125171/371472 [9:57:17<19:00:02, 3.60it/s] 34%|███▎ | 125172/371472 [9:57:17<18:55:52, 3.61it/s] 34%|███▎ | 125173/371472 [9:57:18<18:15:33, 3.75it/s] 34%|███▎ | 125174/371472 [9:57:18<19:19:38, 3.54it/s] 34%|███▎ | 125175/371472 [9:57:18<20:07:46, 3.40it/s] 34%|███▎ | 125176/371472 [9:57:18<19:45:27, 3.46it/s] 34%|███▎ | 125177/371472 [9:57:19<20:21:18, 3.36it/s] 34%|███▎ | 125178/371472 [9:57:19<19:14:50, 3.55it/s] 34%|███▎ | 125179/371472 [9:57:19<18:27:23, 3.71it/s] 34%|███▎ | 125180/371472 [9:57:20<19:21:08, 3.54it/s] {'loss': 3.2778, 'learning_rate': 6.970361352323903e-07, 'epoch': 5.39} + 34%|███▎ | 125180/371472 [9:57:20<19:21:08, 3.54it/s] 34%|███▎ | 125181/371472 [9:57:20<19:20:20, 3.54it/s] 34%|███▎ | 125182/371472 [9:57:20<19:05:27, 3.58it/s] 34%|███▎ | 125183/371472 [9:57:20<18:26:54, 3.71it/s] 34%|███▎ | 125184/371472 [9:57:21<17:57:38, 3.81it/s] 34%|███▎ | 125185/371472 [9:57:21<17:36:34, 3.88it/s] 34%|███▎ | 125186/371472 [9:57:21<18:18:18, 3.74it/s] 34%|███▎ | 125187/371472 [9:57:21<18:01:00, 3.80it/s] 34%|███▎ | 125188/371472 [9:57:22<19:44:26, 3.47it/s] 34%|███▎ | 125189/371472 [9:57:22<19:54:14, 3.44it/s] 34%|███▎ | 125190/371472 [9:57:22<20:35:29, 3.32it/s] 34%|███▎ | 125191/371472 [9:57:23<20:24:43, 3.35it/s] 34%|███▎ | 125192/371472 [9:57:23<20:08:11, 3.40it/s] 34%|███▎ | 125193/371472 [9:57:23<20:05:09, 3.41it/s] 34%|███▎ | 125194/371472 [9:57:24<20:11:08, 3.39it/s] 34%|███▎ | 125195/371472 [9:57:24<19:35:14, 3.49it/s] 34%|███▎ | 125196/371472 [9:57:24<20:40:32, 3.31it/s] 34%|███▎ | 125197/371472 [9:57:25<21:31:43, 3.18it/s] 34%|███▎ | 125198/371472 [9:57:25<20:34:34, 3.32it/s] 34%|███▎ | 125199/371472 [9:57:25<20:10:27, 3.39it/s] 34%|███▎ | 125200/371472 [9:57:25<21:10:45, 3.23it/s] {'loss': 3.2983, 'learning_rate': 6.969876532569113e-07, 'epoch': 5.39} + 34%|███▎ | 125200/371472 [9:57:25<21:10:45, 3.23it/s] 34%|███▎ | 125201/371472 [9:57:26<21:13:27, 3.22it/s] 34%|███▎ | 125202/371472 [9:57:26<20:20:13, 3.36it/s] 34%|███▎ | 125203/371472 [9:57:26<19:39:05, 3.48it/s] 34%|███▎ | 125204/371472 [9:57:27<19:19:54, 3.54it/s] 34%|███▎ | 125205/371472 [9:57:27<18:26:30, 3.71it/s] 34%|███▎ | 125206/371472 [9:57:27<18:27:30, 3.71it/s] 34%|███▎ | 125207/371472 [9:57:27<18:31:48, 3.69it/s] 34%|███▎ | 125208/371472 [9:57:28<18:14:26, 3.75it/s] 34%|███▎ | 125209/371472 [9:57:28<18:36:19, 3.68it/s] 34%|███▎ | 125210/371472 [9:57:28<19:35:03, 3.49it/s] 34%|███▎ | 125211/371472 [9:57:28<20:00:33, 3.42it/s] 34%|███▎ | 125212/371472 [9:57:29<21:50:37, 3.13it/s] 34%|███▎ | 125213/371472 [9:57:29<20:08:16, 3.40it/s] 34%|███▎ | 125214/371472 [9:57:29<19:18:46, 3.54it/s] 34%|███▎ | 125215/371472 [9:57:30<18:33:48, 3.68it/s] 34%|███▎ | 125216/371472 [9:57:30<18:32:40, 3.69it/s] 34%|███▎ | 125217/371472 [9:57:30<19:03:19, 3.59it/s] 34%|███▎ | 125218/371472 [9:57:30<18:52:11, 3.63it/s] 34%|███▎ | 125219/371472 [9:57:31<20:02:02, 3.41it/s] 34%|███▎ | 125220/371472 [9:57:31<19:03:03, 3.59it/s] {'loss': 3.2102, 'learning_rate': 6.969391712814324e-07, 'epoch': 5.39} + 34%|███▎ | 125220/371472 [9:57:31<19:03:03, 3.59it/s] 34%|███▎ | 125221/371472 [9:57:31<19:03:20, 3.59it/s] 34%|███▎ | 125222/371472 [9:57:32<18:48:17, 3.64it/s] 34%|███▎ | 125223/371472 [9:57:32<20:37:22, 3.32it/s] 34%|███▎ | 125224/371472 [9:57:32<19:29:03, 3.51it/s] 34%|███▎ | 125225/371472 [9:57:32<19:45:55, 3.46it/s] 34%|███▎ | 125226/371472 [9:57:33<18:36:53, 3.67it/s] 34%|███▎ | 125227/371472 [9:57:33<19:08:52, 3.57it/s] 34%|███▎ | 125228/371472 [9:57:33<20:03:36, 3.41it/s] 34%|███▎ | 125229/371472 [9:57:34<19:43:47, 3.47it/s] 34%|███▎ | 125230/371472 [9:57:34<18:50:48, 3.63it/s] 34%|███▎ | 125231/371472 [9:57:34<19:24:43, 3.52it/s] 34%|███▎ | 125232/371472 [9:57:34<18:29:11, 3.70it/s] 34%|███▎ | 125233/371472 [9:57:35<18:51:39, 3.63it/s] 34%|███▎ | 125234/371472 [9:57:35<19:26:44, 3.52it/s] 34%|███▎ | 125235/371472 [9:57:35<19:44:28, 3.46it/s] 34%|███▎ | 125236/371472 [9:57:36<18:42:19, 3.66it/s] 34%|███▎ | 125237/371472 [9:57:36<18:34:17, 3.68it/s] 34%|███▎ | 125238/371472 [9:57:36<19:27:05, 3.52it/s] 34%|███▎ | 125239/371472 [9:57:36<19:07:50, 3.58it/s] 34%|███▎ | 125240/371472 [9:57:37<19:15:25, 3.55it/s] {'loss': 3.295, 'learning_rate': 6.968906893059535e-07, 'epoch': 5.39} + 34%|███▎ | 125240/371472 [9:57:37<19:15:25, 3.55it/s] 34%|███▎ | 125241/371472 [9:57:37<18:42:55, 3.65it/s] 34%|███▎ | 125242/371472 [9:57:37<18:07:04, 3.78it/s] 34%|███▎ | 125243/371472 [9:57:37<18:39:21, 3.67it/s] 34%|███▎ | 125244/371472 [9:57:38<18:13:29, 3.75it/s] 34%|███▎ | 125245/371472 [9:57:38<18:25:37, 3.71it/s] 34%|███▎ | 125246/371472 [9:57:38<18:14:38, 3.75it/s] 34%|███▎ | 125247/371472 [9:57:39<18:31:27, 3.69it/s] 34%|███▎ | 125248/371472 [9:57:39<18:01:39, 3.79it/s] 34%|███▎ | 125249/371472 [9:57:39<19:42:11, 3.47it/s] 34%|███▎ | 125250/371472 [9:57:39<19:14:25, 3.55it/s] 34%|███▎ | 125251/371472 [9:57:40<18:49:33, 3.63it/s] 34%|███▎ | 125252/371472 [9:57:40<18:08:05, 3.77it/s] 34%|███▎ | 125253/371472 [9:57:40<18:04:35, 3.78it/s] 34%|███▎ | 125254/371472 [9:57:40<18:18:28, 3.74it/s] 34%|███▎ | 125255/371472 [9:57:41<18:13:17, 3.75it/s] 34%|███▎ | 125256/371472 [9:57:41<18:15:02, 3.75it/s] 34%|███▎ | 125257/371472 [9:57:41<17:59:24, 3.80it/s] 34%|███▎ | 125258/371472 [9:57:41<18:03:35, 3.79it/s] 34%|███▎ | 125259/371472 [9:57:42<17:54:17, 3.82it/s] 34%|███▎ | 125260/371472 [9:57:42<18:05:42, 3.78it/s] {'loss': 3.2226, 'learning_rate': 6.968422073304747e-07, 'epoch': 5.4} + 34%|███▎ | 125260/371472 [9:57:42<18:05:42, 3.78it/s] 34%|███▎ | 125261/371472 [9:57:42<17:51:47, 3.83it/s] 34%|███▎ | 125262/371472 [9:57:43<19:44:51, 3.46it/s] 34%|███▎ | 125263/371472 [9:57:43<19:19:47, 3.54it/s] 34%|███▎ | 125264/371472 [9:57:43<19:00:31, 3.60it/s] 34%|███▎ | 125265/371472 [9:57:43<18:17:42, 3.74it/s] 34%|███▎ | 125266/371472 [9:57:44<17:48:46, 3.84it/s] 34%|███▎ | 125267/371472 [9:57:44<17:38:53, 3.88it/s] 34%|███▎ | 125268/371472 [9:57:44<17:45:51, 3.85it/s] 34%|███▎ | 125269/371472 [9:57:44<18:32:53, 3.69it/s] 34%|███▎ | 125270/371472 [9:57:45<18:29:10, 3.70it/s] 34%|███▎ | 125271/371472 [9:57:45<17:57:47, 3.81it/s] 34%|███▎ | 125272/371472 [9:57:45<18:39:57, 3.66it/s] 34%|███▎ | 125273/371472 [9:57:46<18:18:51, 3.73it/s] 34%|███▎ | 125274/371472 [9:57:46<18:25:48, 3.71it/s] 34%|███▎ | 125275/371472 [9:57:46<18:36:41, 3.67it/s] 34%|███▎ | 125276/371472 [9:57:46<18:46:12, 3.64it/s] 34%|███▎ | 125277/371472 [9:57:47<18:41:17, 3.66it/s] 34%|███▎ | 125278/371472 [9:57:47<18:21:17, 3.73it/s] 34%|███▎ | 125279/371472 [9:57:47<17:40:12, 3.87it/s] 34%|███▎ | 125280/371472 [9:57:47<18:42:25, 3.66it/s] {'loss': 3.218, 'learning_rate': 6.967937253549958e-07, 'epoch': 5.4} + 34%|███▎ | 125280/371472 [9:57:47<18:42:25, 3.66it/s] 34%|███▎ | 125281/371472 [9:57:48<19:00:15, 3.60it/s] 34%|███▎ | 125282/371472 [9:57:48<20:04:56, 3.41it/s] 34%|███▎ | 125283/371472 [9:57:48<21:33:15, 3.17it/s] 34%|███▎ | 125284/371472 [9:57:49<21:31:06, 3.18it/s] 34%|███▎ | 125285/371472 [9:57:49<21:16:54, 3.21it/s] 34%|███▎ | 125286/371472 [9:57:49<20:41:43, 3.30it/s] 34%|███▎ | 125287/371472 [9:57:50<20:20:29, 3.36it/s] 34%|███▎ | 125288/371472 [9:57:50<20:39:04, 3.31it/s] 34%|███▎ | 125289/371472 [9:57:50<22:26:59, 3.05it/s] 34%|███▎ | 125290/371472 [9:57:51<20:55:48, 3.27it/s] 34%|███▎ | 125291/371472 [9:57:51<19:45:58, 3.46it/s] 34%|███▎ | 125292/371472 [9:57:51<19:28:12, 3.51it/s] 34%|███▎ | 125293/371472 [9:57:51<19:03:18, 3.59it/s] 34%|███▎ | 125294/371472 [9:57:52<19:12:11, 3.56it/s] 34%|███▎ | 125295/371472 [9:57:52<18:41:29, 3.66it/s] 34%|███▎ | 125296/371472 [9:57:52<18:04:02, 3.78it/s] 34%|███▎ | 125297/371472 [9:57:52<18:03:42, 3.79it/s] 34%|███▎ | 125298/371472 [9:57:53<18:09:37, 3.77it/s] 34%|███▎ | 125299/371472 [9:57:53<17:58:22, 3.80it/s] 34%|███▎ | 125300/371472 [9:57:53<18:11:16, 3.76it/s] {'loss': 3.042, 'learning_rate': 6.967452433795169e-07, 'epoch': 5.4} + 34%|███▎ | 125300/371472 [9:57:53<18:11:16, 3.76it/s] 34%|███▎ | 125301/371472 [9:57:53<18:12:48, 3.75it/s] 34%|███▎ | 125302/371472 [9:57:54<17:50:13, 3.83it/s] 34%|███▎ | 125303/371472 [9:57:54<18:16:07, 3.74it/s] 34%|███▎ | 125304/371472 [9:57:54<18:59:26, 3.60it/s] 34%|███▎ | 125305/371472 [9:57:55<20:04:20, 3.41it/s] 34%|███▎ | 125306/371472 [9:57:55<20:50:33, 3.28it/s] 34%|███▎ | 125307/371472 [9:57:55<19:54:24, 3.43it/s] 34%|███▎ | 125308/371472 [9:57:55<20:03:02, 3.41it/s] 34%|███▎ | 125309/371472 [9:57:56<19:04:47, 3.58it/s] 34%|███▎ | 125310/371472 [9:57:56<18:21:27, 3.72it/s] 34%|███▎ | 125311/371472 [9:57:56<18:08:14, 3.77it/s] 34%|███▎ | 125312/371472 [9:57:56<17:54:54, 3.82it/s] 34%|███▎ | 125313/371472 [9:57:57<18:07:45, 3.77it/s] 34%|███▎ | 125314/371472 [9:57:57<18:00:00, 3.80it/s] 34%|███▎ | 125315/371472 [9:57:57<17:45:52, 3.85it/s] 34%|███▎ | 125316/371472 [9:57:58<17:47:01, 3.84it/s] 34%|███▎ | 125317/371472 [9:57:58<18:54:21, 3.62it/s] 34%|███▎ | 125318/371472 [9:57:58<19:41:19, 3.47it/s] 34%|███▎ | 125319/371472 [9:57:59<21:00:46, 3.25it/s] 34%|███▎ | 125320/371472 [9:57:59<20:16:12, 3.37it/s] {'loss': 3.1354, 'learning_rate': 6.96696761404038e-07, 'epoch': 5.4} + 34%|███▎ | 125320/371472 [9:57:59<20:16:12, 3.37it/s] 34%|███▎ | 125321/371472 [9:57:59<19:58:35, 3.42it/s] 34%|███▎ | 125322/371472 [9:57:59<21:27:51, 3.19it/s] 34%|███▎ | 125323/371472 [9:58:00<19:55:05, 3.43it/s] 34%|███▎ | 125324/371472 [9:58:00<20:02:48, 3.41it/s] 34%|███▎ | 125325/371472 [9:58:00<20:40:59, 3.31it/s] 34%|███▎ | 125326/371472 [9:58:01<20:42:59, 3.30it/s] 34%|███▎ | 125327/371472 [9:58:01<21:03:43, 3.25it/s] 34%|███▎ | 125328/371472 [9:58:01<20:23:49, 3.35it/s] 34%|███▎ | 125329/371472 [9:58:01<20:19:46, 3.36it/s] 34%|███▎ | 125330/371472 [9:58:02<20:09:32, 3.39it/s] 34%|███▎ | 125331/371472 [9:58:02<19:32:27, 3.50it/s] 34%|███▎ | 125332/371472 [9:58:02<19:55:30, 3.43it/s] 34%|███▎ | 125333/371472 [9:58:03<20:27:58, 3.34it/s] 34%|███▎ | 125334/371472 [9:58:03<20:09:46, 3.39it/s] 34%|███▎ | 125335/371472 [9:58:03<19:59:41, 3.42it/s] 34%|███▎ | 125336/371472 [9:58:04<19:41:59, 3.47it/s] 34%|███▎ | 125337/371472 [9:58:04<19:16:05, 3.55it/s] 34%|███▎ | 125338/371472 [9:58:04<20:40:29, 3.31it/s] 34%|███▎ | 125339/371472 [9:58:04<20:34:08, 3.32it/s] 34%|███▎ | 125340/371472 [9:58:05<21:04:33, 3.24it/s] {'loss': 3.3227, 'learning_rate': 6.966482794285591e-07, 'epoch': 5.4} + 34%|███▎ | 125340/371472 [9:58:05<21:04:33, 3.24it/s] 34%|███▎ | 125341/371472 [9:58:05<20:03:03, 3.41it/s] 34%|███▎ | 125342/371472 [9:58:05<19:53:54, 3.44it/s] 34%|███▎ | 125343/371472 [9:58:06<19:50:35, 3.45it/s] 34%|███▎ | 125344/371472 [9:58:06<19:46:50, 3.46it/s] 34%|███▎ | 125345/371472 [9:58:06<19:06:27, 3.58it/s] 34%|███▎ | 125346/371472 [9:58:06<20:25:53, 3.35it/s] 34%|███▎ | 125347/371472 [9:58:07<20:09:09, 3.39it/s] 34%|███▎ | 125348/371472 [9:58:07<19:46:53, 3.46it/s] 34%|███▎ | 125349/371472 [9:58:07<19:33:31, 3.50it/s] 34%|███▎ | 125350/371472 [9:58:08<19:35:00, 3.49it/s] 34%|███▎ | 125351/371472 [9:58:08<19:15:38, 3.55it/s] 34%|███▎ | 125352/371472 [9:58:08<19:33:11, 3.50it/s] 34%|███▎ | 125353/371472 [9:58:08<19:18:07, 3.54it/s] 34%|███▎ | 125354/371472 [9:58:09<18:29:13, 3.70it/s] 34%|███▎ | 125355/371472 [9:58:09<17:48:41, 3.84it/s] 34%|███▎ | 125356/371472 [9:58:09<17:40:36, 3.87it/s] 34%|███▎ | 125357/371472 [9:58:10<19:16:39, 3.55it/s] 34%|███▎ | 125358/371472 [9:58:10<19:21:43, 3.53it/s] 34%|███▎ | 125359/371472 [9:58:10<19:12:11, 3.56it/s] 34%|███▎ | 125360/371472 [9:58:10<19:09:41, 3.57it/s] {'loss': 3.2836, 'learning_rate': 6.965997974530802e-07, 'epoch': 5.4} + 34%|███▎ | 125360/371472 [9:58:10<19:09:41, 3.57it/s] 34%|███▎ | 125361/371472 [9:58:11<18:47:49, 3.64it/s] 34%|███▎ | 125362/371472 [9:58:11<18:22:27, 3.72it/s] 34%|███▎ | 125363/371472 [9:58:11<18:28:07, 3.70it/s] 34%|███▎ | 125364/371472 [9:58:11<18:52:04, 3.62it/s] 34%|███▎ | 125365/371472 [9:58:12<18:56:20, 3.61it/s] 34%|███▎ | 125366/371472 [9:58:12<18:46:28, 3.64it/s] 34%|███▎ | 125367/371472 [9:58:12<18:25:10, 3.71it/s] 34%|███▎ | 125368/371472 [9:58:12<17:55:07, 3.82it/s] 34%|███▎ | 125369/371472 [9:58:13<17:55:07, 3.82it/s] 34%|███▎ | 125370/371472 [9:58:13<17:42:29, 3.86it/s] 34%|███▎ | 125371/371472 [9:58:13<18:33:02, 3.69it/s] 34%|███▍ | 125372/371472 [9:58:14<17:54:04, 3.82it/s] 34%|███▍ | 125373/371472 [9:58:14<18:54:28, 3.62it/s] 34%|███▍ | 125374/371472 [9:58:14<18:10:07, 3.76it/s] 34%|███▍ | 125375/371472 [9:58:14<17:45:10, 3.85it/s] 34%|███▍ | 125376/371472 [9:58:15<19:06:40, 3.58it/s] 34%|███▍ | 125377/371472 [9:58:15<18:38:09, 3.67it/s] 34%|███▍ | 125378/371472 [9:58:15<18:13:58, 3.75it/s] 34%|███▍ | 125379/371472 [9:58:15<18:10:36, 3.76it/s] 34%|███▍ | 125380/371472 [9:58:16<18:23:04, 3.72it/s] {'loss': 3.0382, 'learning_rate': 6.965513154776013e-07, 'epoch': 5.4} + 34%|███▍ | 125380/371472 [9:58:16<18:23:04, 3.72it/s] 34%|███▍ | 125381/371472 [9:58:16<18:06:57, 3.77it/s] 34%|███▍ | 125382/371472 [9:58:16<18:00:05, 3.80it/s] 34%|███▍ | 125383/371472 [9:58:16<17:47:02, 3.84it/s] 34%|███▍ | 125384/371472 [9:58:17<18:18:01, 3.74it/s] 34%|███▍ | 125385/371472 [9:58:17<20:23:07, 3.35it/s] 34%|███▍ | 125386/371472 [9:58:17<19:03:53, 3.59it/s] 34%|███▍ | 125387/371472 [9:58:18<18:51:29, 3.62it/s] 34%|███▍ | 125388/371472 [9:58:18<19:29:38, 3.51it/s] 34%|███▍ | 125389/371472 [9:58:18<19:31:00, 3.50it/s] 34%|███▍ | 125390/371472 [9:58:18<19:06:17, 3.58it/s] 34%|███▍ | 125391/371472 [9:58:19<18:54:22, 3.62it/s] 34%|███▍ | 125392/371472 [9:58:19<18:33:57, 3.68it/s] 34%|███▍ | 125393/371472 [9:58:19<17:54:56, 3.82it/s] 34%|███▍ | 125394/371472 [9:58:20<17:35:02, 3.89it/s] 34%|███▍ | 125395/371472 [9:58:20<17:57:34, 3.81it/s] 34%|███▍ | 125396/371472 [9:58:20<18:08:06, 3.77it/s] 34%|███▍ | 125397/371472 [9:58:20<17:57:23, 3.81it/s] 34%|███▍ | 125398/371472 [9:58:21<19:23:51, 3.52it/s] 34%|███▍ | 125399/371472 [9:58:21<18:18:37, 3.73it/s] 34%|███▍ | 125400/371472 [9:58:21<18:59:51, 3.60it/s] {'loss': 3.2907, 'learning_rate': 6.965028335021224e-07, 'epoch': 5.4} + 34%|███▍ | 125400/371472 [9:58:21<18:59:51, 3.60it/s] 34%|███▍ | 125401/371472 [9:58:21<18:13:46, 3.75it/s] 34%|███▍ | 125402/371472 [9:58:22<18:22:16, 3.72it/s] 34%|███▍ | 125403/371472 [9:58:22<18:21:55, 3.72it/s] 34%|███▍ | 125404/371472 [9:58:22<17:49:37, 3.83it/s] 34%|███▍ | 125405/371472 [9:58:22<18:18:20, 3.73it/s] 34%|███▍ | 125406/371472 [9:58:23<19:30:09, 3.50it/s] 34%|███▍ | 125407/371472 [9:58:23<21:38:39, 3.16it/s] 34%|███▍ | 125408/371472 [9:58:23<19:59:41, 3.42it/s] 34%|███▍ | 125409/371472 [9:58:24<21:00:18, 3.25it/s] 34%|███▍ | 125410/371472 [9:58:24<19:59:24, 3.42it/s] 34%|███▍ | 125411/371472 [9:58:24<19:11:17, 3.56it/s] 34%|███▍ | 125412/371472 [9:58:25<18:42:11, 3.65it/s] 34%|███▍ | 125413/371472 [9:58:25<18:19:45, 3.73it/s] 34%|███▍ | 125414/371472 [9:58:25<18:15:41, 3.74it/s] 34%|███▍ | 125415/371472 [9:58:25<18:27:35, 3.70it/s] 34%|███▍ | 125416/371472 [9:58:26<19:43:41, 3.46it/s] 34%|███▍ | 125417/371472 [9:58:26<18:51:21, 3.62it/s] 34%|███▍ | 125418/371472 [9:58:26<18:25:59, 3.71it/s] 34%|███▍ | 125419/371472 [9:58:26<19:22:35, 3.53it/s] 34%|███▍ | 125420/371472 [9:58:27<19:50:14, 3.45it/s] {'loss': 3.4009, 'learning_rate': 6.964543515266436e-07, 'epoch': 5.4} + 34%|███▍ | 125420/371472 [9:58:27<19:50:14, 3.45it/s] 34%|███▍ | 125421/371472 [9:58:27<18:57:10, 3.61it/s] 34%|███▍ | 125422/371472 [9:58:27<21:37:52, 3.16it/s] 34%|███▍ | 125423/371472 [9:58:28<21:20:45, 3.20it/s] 34%|███▍ | 125424/371472 [9:58:28<21:24:59, 3.19it/s] 34%|███▍ | 125425/371472 [9:58:28<21:39:32, 3.16it/s] 34%|███▍ | 125426/371472 [9:58:29<21:46:11, 3.14it/s] 34%|███▍ | 125427/371472 [9:58:29<20:40:35, 3.31it/s] 34%|███▍ | 125428/371472 [9:58:29<22:14:24, 3.07it/s] 34%|███▍ | 125429/371472 [9:58:30<21:14:32, 3.22it/s] 34%|███▍ | 125430/371472 [9:58:30<19:50:27, 3.44it/s] 34%|███▍ | 125431/371472 [9:58:30<19:37:30, 3.48it/s] 34%|███▍ | 125432/371472 [9:58:30<20:02:39, 3.41it/s] 34%|███▍ | 125433/371472 [9:58:31<19:21:18, 3.53it/s] 34%|███▍ | 125434/371472 [9:58:31<18:31:48, 3.69it/s] 34%|███▍ | 125435/371472 [9:58:31<17:52:31, 3.82it/s] 34%|███▍ | 125436/371472 [9:58:31<17:32:00, 3.90it/s] 34%|███▍ | 125437/371472 [9:58:32<19:10:16, 3.56it/s] 34%|███▍ | 125438/371472 [9:58:32<18:51:54, 3.62it/s] 34%|███▍ | 125439/371472 [9:58:32<18:29:57, 3.69it/s] 34%|███▍ | 125440/371472 [9:58:33<19:33:05, 3.50it/s] {'loss': 3.2983, 'learning_rate': 6.964058695511647e-07, 'epoch': 5.4} + 34%|███▍ | 125440/371472 [9:58:33<19:33:05, 3.50it/s] 34%|███▍ | 125441/371472 [9:58:33<19:52:01, 3.44it/s] 34%|███▍ | 125442/371472 [9:58:33<20:14:35, 3.38it/s] 34%|███▍ | 125443/371472 [9:58:34<19:39:32, 3.48it/s] 34%|███▍ | 125444/371472 [9:58:34<18:53:53, 3.62it/s] 34%|███▍ | 125445/371472 [9:58:34<18:08:04, 3.77it/s] 34%|███▍ | 125446/371472 [9:58:34<18:17:19, 3.74it/s] 34%|███▍ | 125447/371472 [9:58:35<19:02:54, 3.59it/s] 34%|███▍ | 125448/371472 [9:58:35<21:12:28, 3.22it/s] 34%|███▍ | 125449/371472 [9:58:35<21:33:01, 3.17it/s] 34%|███▍ | 125450/371472 [9:58:36<20:51:10, 3.28it/s] 34%|███▍ | 125451/371472 [9:58:36<20:06:12, 3.40it/s] 34%|███▍ | 125452/371472 [9:58:36<19:28:45, 3.51it/s] 34%|███▍ | 125453/371472 [9:58:36<19:19:04, 3.54it/s] 34%|███▍ | 125454/371472 [9:58:37<19:41:14, 3.47it/s] 34%|███▍ | 125455/371472 [9:58:37<18:33:30, 3.68it/s] 34%|███▍ | 125456/371472 [9:58:37<19:42:51, 3.47it/s] 34%|███▍ | 125457/371472 [9:58:38<19:31:12, 3.50it/s] 34%|███▍ | 125458/371472 [9:58:38<23:49:04, 2.87it/s] 34%|███▍ | 125459/371472 [9:58:38<22:16:03, 3.07it/s] 34%|███▍ | 125460/371472 [9:58:39<21:14:44, 3.22it/s] {'loss': 3.2915, 'learning_rate': 6.963573875756856e-07, 'epoch': 5.4} + 34%|███▍ | 125460/371472 [9:58:39<21:14:44, 3.22it/s] 34%|███▍ | 125461/371472 [9:58:39<20:47:02, 3.29it/s] 34%|███▍ | 125462/371472 [9:58:39<21:10:33, 3.23it/s] 34%|███▍ | 125463/371472 [9:58:39<20:21:18, 3.36it/s] 34%|███▍ | 125464/371472 [9:58:40<20:08:53, 3.39it/s] 34%|███▍ | 125465/371472 [9:58:40<19:28:00, 3.51it/s] 34%|███▍ | 125466/371472 [9:58:40<19:34:22, 3.49it/s] 34%|███▍ | 125467/371472 [9:58:41<20:48:57, 3.28it/s] 34%|███▍ | 125468/371472 [9:58:41<20:22:30, 3.35it/s] 34%|███▍ | 125469/371472 [9:58:41<19:32:52, 3.50it/s] 34%|███▍ | 125470/371472 [9:58:42<21:09:27, 3.23it/s] 34%|███▍ | 125471/371472 [9:58:42<20:53:24, 3.27it/s] 34%|███▍ | 125472/371472 [9:58:42<20:01:42, 3.41it/s] 34%|███▍ | 125473/371472 [9:58:42<19:25:29, 3.52it/s] 34%|███▍ | 125474/371472 [9:58:43<18:58:28, 3.60it/s] 34%|███▍ | 125475/371472 [9:58:43<18:29:15, 3.70it/s] 34%|███▍ | 125476/371472 [9:58:43<18:12:09, 3.75it/s] 34%|███▍ | 125477/371472 [9:58:43<18:08:19, 3.77it/s] 34%|███▍ | 125478/371472 [9:58:44<17:57:17, 3.81it/s] 34%|███▍ | 125479/371472 [9:58:44<19:30:41, 3.50it/s] 34%|███▍ | 125480/371472 [9:58:44<18:45:17, 3.64it/s] {'loss': 3.2158, 'learning_rate': 6.963089056002068e-07, 'epoch': 5.4} + 34%|███▍ | 125480/371472 [9:58:44<18:45:17, 3.64it/s] 34%|███▍ | 125481/371472 [9:58:45<18:42:41, 3.65it/s] 34%|███▍ | 125482/371472 [9:58:45<18:51:28, 3.62it/s] 34%|███▍ | 125483/371472 [9:58:45<20:56:08, 3.26it/s] 34%|███▍ | 125484/371472 [9:58:45<20:01:09, 3.41it/s] 34%|███▍ | 125485/371472 [9:58:46<19:51:41, 3.44it/s] 34%|███▍ | 125486/371472 [9:58:46<18:37:07, 3.67it/s] 34%|███▍ | 125487/371472 [9:58:46<19:06:56, 3.57it/s] 34%|███▍ | 125488/371472 [9:58:46<18:15:21, 3.74it/s] 34%|███▍ | 125489/371472 [9:58:47<21:17:07, 3.21it/s] 34%|███▍ | 125490/371472 [9:58:47<20:23:11, 3.35it/s] 34%|███▍ | 125491/371472 [9:58:47<20:53:36, 3.27it/s] 34%|███▍ | 125492/371472 [9:58:48<20:57:26, 3.26it/s] 34%|███▍ | 125493/371472 [9:58:48<20:49:22, 3.28it/s] 34%|███▍ | 125494/371472 [9:58:48<20:48:13, 3.28it/s] 34%|███▍ | 125495/371472 [9:58:49<19:43:03, 3.47it/s] 34%|███▍ | 125496/371472 [9:58:49<19:37:53, 3.48it/s] 34%|███▍ | 125497/371472 [9:58:49<19:07:22, 3.57it/s] 34%|███▍ | 125498/371472 [9:58:49<18:50:42, 3.63it/s] 34%|███▍ | 125499/371472 [9:58:50<19:33:44, 3.49it/s] 34%|███▍ | 125500/371472 [9:58:50<18:51:53, 3.62it/s] {'loss': 3.1302, 'learning_rate': 6.962604236247279e-07, 'epoch': 5.41} + 34%|███▍ | 125500/371472 [9:58:50<18:51:53, 3.62it/s] 34%|███▍ | 125501/371472 [9:58:50<19:23:07, 3.52it/s] 34%|███▍ | 125502/371472 [9:58:51<19:08:00, 3.57it/s] 34%|███▍ | 125503/371472 [9:58:51<18:38:36, 3.66it/s] 34%|███▍ | 125504/371472 [9:58:51<19:13:50, 3.55it/s] 34%|███▍ | 125505/371472 [9:58:51<18:52:53, 3.62it/s] 34%|███▍ | 125506/371472 [9:58:52<18:53:58, 3.62it/s] 34%|███▍ | 125507/371472 [9:58:52<18:37:23, 3.67it/s] 34%|███▍ | 125508/371472 [9:58:52<20:12:50, 3.38it/s] 34%|███▍ | 125509/371472 [9:58:53<19:36:46, 3.48it/s] 34%|███▍ | 125510/371472 [9:58:53<19:34:19, 3.49it/s] 34%|███▍ | 125511/371472 [9:58:53<20:03:30, 3.41it/s] 34%|███▍ | 125512/371472 [9:58:53<20:08:37, 3.39it/s] 34%|███▍ | 125513/371472 [9:58:54<19:08:54, 3.57it/s] 34%|███▍ | 125514/371472 [9:58:54<19:38:11, 3.48it/s] 34%|███▍ | 125515/371472 [9:58:54<21:04:55, 3.24it/s] 34%|███▍ | 125516/371472 [9:58:55<20:09:15, 3.39it/s] 34%|███▍ | 125517/371472 [9:58:55<19:42:02, 3.47it/s] 34%|███▍ | 125518/371472 [9:58:55<20:26:28, 3.34it/s] 34%|███▍ | 125519/371472 [9:58:56<19:36:40, 3.48it/s] 34%|███▍ | 125520/371472 [9:58:56<19:06:06, 3.58it/s] {'loss': 3.1503, 'learning_rate': 6.96211941649249e-07, 'epoch': 5.41} + 34%|███▍ | 125520/371472 [9:58:56<19:06:06, 3.58it/s] 34%|███▍ | 125521/371472 [9:58:56<19:54:09, 3.43it/s] 34%|███▍ | 125522/371472 [9:58:56<20:50:41, 3.28it/s] 34%|███▍ | 125523/371472 [9:58:57<20:01:38, 3.41it/s] 34%|███▍ | 125524/371472 [9:58:57<19:55:08, 3.43it/s] 34%|███▍ | 125525/371472 [9:58:57<20:28:22, 3.34it/s] 34%|███▍ | 125526/371472 [9:58:58<19:56:10, 3.43it/s] 34%|███▍ | 125527/371472 [9:58:58<19:20:07, 3.53it/s] 34%|███▍ | 125528/371472 [9:58:58<19:00:55, 3.59it/s] 34%|███▍ | 125529/371472 [9:58:58<19:50:51, 3.44it/s] 34%|███▍ | 125530/371472 [9:58:59<19:30:50, 3.50it/s] 34%|███▍ | 125531/371472 [9:58:59<19:47:13, 3.45it/s] 34%|███▍ | 125532/371472 [9:58:59<19:59:14, 3.42it/s] 34%|███▍ | 125533/371472 [9:59:00<20:13:43, 3.38it/s] 34%|███▍ | 125534/371472 [9:59:00<20:01:12, 3.41it/s] 34%|███▍ | 125535/371472 [9:59:00<20:05:44, 3.40it/s] 34%|███▍ | 125536/371472 [9:59:00<19:46:43, 3.45it/s] 34%|███▍ | 125537/371472 [9:59:01<21:32:31, 3.17it/s] 34%|███▍ | 125538/371472 [9:59:01<20:06:39, 3.40it/s] 34%|███▍ | 125539/371472 [9:59:01<19:46:18, 3.46it/s] 34%|███▍ | 125540/371472 [9:59:02<18:56:37, 3.61it/s] {'loss': 3.1419, 'learning_rate': 6.961634596737701e-07, 'epoch': 5.41} + 34%|███▍ | 125540/371472 [9:59:02<18:56:37, 3.61it/s] 34%|███▍ | 125541/371472 [9:59:02<19:17:50, 3.54it/s] 34%|███▍ | 125542/371472 [9:59:02<19:22:36, 3.53it/s] 34%|███▍ | 125543/371472 [9:59:02<18:37:32, 3.67it/s] 34%|███▍ | 125544/371472 [9:59:03<18:56:05, 3.61it/s] 34%|███▍ | 125545/371472 [9:59:03<18:44:22, 3.65it/s] 34%|███▍ | 125546/371472 [9:59:03<19:03:57, 3.58it/s] 34%|███▍ | 125547/371472 [9:59:04<18:58:49, 3.60it/s] 34%|███▍ | 125548/371472 [9:59:04<19:08:10, 3.57it/s] 34%|███▍ | 125549/371472 [9:59:04<19:54:02, 3.43it/s] 34%|███▍ | 125550/371472 [9:59:04<18:47:36, 3.63it/s] 34%|███▍ | 125551/371472 [9:59:05<18:14:36, 3.74it/s] 34%|███▍ | 125552/371472 [9:59:05<17:57:22, 3.80it/s] 34%|███▍ | 125553/371472 [9:59:05<18:04:41, 3.78it/s] 34%|███▍ | 125554/371472 [9:59:05<17:31:19, 3.90it/s] 34%|███▍ | 125555/371472 [9:59:06<18:33:17, 3.68it/s] 34%|███▍ | 125556/371472 [9:59:06<26:58:39, 2.53it/s] 34%|███▍ | 125557/371472 [9:59:07<24:32:27, 2.78it/s] 34%|███▍ | 125558/371472 [9:59:07<23:27:17, 2.91it/s] 34%|███▍ | 125559/371472 [9:59:07<21:24:19, 3.19it/s] 34%|███▍ | 125560/371472 [9:59:08<21:46:51, 3.14it/s] {'loss': 3.381, 'learning_rate': 6.961149776982913e-07, 'epoch': 5.41} + 34%|███▍ | 125560/371472 [9:59:08<21:46:51, 3.14it/s] 34%|███▍ | 125561/371472 [9:59:08<21:09:27, 3.23it/s] 34%|███▍ | 125562/371472 [9:59:08<19:57:48, 3.42it/s] 34%|███▍ | 125563/371472 [9:59:08<20:30:45, 3.33it/s] 34%|███▍ | 125564/371472 [9:59:09<19:34:09, 3.49it/s] 34%|███▍ | 125565/371472 [9:59:09<19:35:45, 3.49it/s] 34%|███▍ | 125566/371472 [9:59:09<18:44:27, 3.64it/s] 34%|███▍ | 125567/371472 [9:59:09<18:45:56, 3.64it/s] 34%|███▍ | 125568/371472 [9:59:10<19:19:47, 3.53it/s] 34%|███▍ | 125569/371472 [9:59:10<18:35:10, 3.68it/s] 34%|███▍ | 125570/371472 [9:59:10<18:02:19, 3.79it/s] 34%|███▍ | 125571/371472 [9:59:11<17:48:13, 3.84it/s] 34%|███▍ | 125572/371472 [9:59:11<17:37:29, 3.88it/s] 34%|███▍ | 125573/371472 [9:59:11<18:02:21, 3.79it/s] 34%|███▍ | 125574/371472 [9:59:11<18:26:18, 3.70it/s] 34%|███▍ | 125575/371472 [9:59:12<19:01:34, 3.59it/s] 34%|███▍ | 125576/371472 [9:59:12<18:30:54, 3.69it/s] 34%|███▍ | 125577/371472 [9:59:12<18:07:44, 3.77it/s] 34%|███▍ | 125578/371472 [9:59:12<17:25:22, 3.92it/s] 34%|███▍ | 125579/371472 [9:59:13<17:48:00, 3.84it/s] 34%|███▍ | 125580/371472 [9:59:13<18:35:48, 3.67it/s] {'loss': 3.159, 'learning_rate': 6.960664957228124e-07, 'epoch': 5.41} + 34%|███▍ | 125580/371472 [9:59:13<18:35:48, 3.67it/s] 34%|███▍ | 125581/371472 [9:59:13<19:10:10, 3.56it/s] 34%|███▍ | 125582/371472 [9:59:14<19:40:37, 3.47it/s] 34%|██���▍ | 125583/371472 [9:59:14<19:04:29, 3.58it/s] 34%|███▍ | 125584/371472 [9:59:14<21:07:54, 3.23it/s] 34%|███▍ | 125585/371472 [9:59:14<21:03:41, 3.24it/s] 34%|███▍ | 125586/371472 [9:59:15<20:22:17, 3.35it/s] 34%|███▍ | 125587/371472 [9:59:15<19:28:36, 3.51it/s] 34%|███▍ | 125588/371472 [9:59:15<19:38:46, 3.48it/s] 34%|███▍ | 125589/371472 [9:59:16<19:59:31, 3.42it/s] 34%|███▍ | 125590/371472 [9:59:16<19:16:50, 3.54it/s] 34%|███▍ | 125591/371472 [9:59:16<20:47:40, 3.28it/s] 34%|███▍ | 125592/371472 [9:59:17<19:58:14, 3.42it/s] 34%|███▍ | 125593/371472 [9:59:17<20:36:25, 3.31it/s] 34%|███▍ | 125594/371472 [9:59:17<19:45:47, 3.46it/s] 34%|███▍ | 125595/371472 [9:59:17<19:24:25, 3.52it/s] 34%|███▍ | 125596/371472 [9:59:18<18:53:52, 3.61it/s] 34%|███▍ | 125597/371472 [9:59:18<19:05:12, 3.58it/s] 34%|███▍ | 125598/371472 [9:59:18<19:26:40, 3.51it/s] 34%|███▍ | 125599/371472 [9:59:19<19:51:48, 3.44it/s] 34%|███▍ | 125600/371472 [9:59:19<19:47:47, 3.45it/s] {'loss': 3.2026, 'learning_rate': 6.960180137473334e-07, 'epoch': 5.41} + 34%|███▍ | 125600/371472 [9:59:19<19:47:47, 3.45it/s] 34%|███▍ | 125601/371472 [9:59:19<18:48:19, 3.63it/s] 34%|███▍ | 125602/371472 [9:59:19<18:35:48, 3.67it/s] 34%|███▍ | 125603/371472 [9:59:20<18:27:53, 3.70it/s] 34%|███▍ | 125604/371472 [9:59:20<18:24:55, 3.71it/s] 34%|███▍ | 125605/371472 [9:59:20<18:51:48, 3.62it/s] 34%|███▍ | 125606/371472 [9:59:20<18:29:11, 3.69it/s] 34%|███▍ | 125607/371472 [9:59:21<18:35:16, 3.67it/s] 34%|███▍ | 125608/371472 [9:59:21<18:38:24, 3.66it/s] 34%|███▍ | 125609/371472 [9:59:21<18:49:04, 3.63it/s] 34%|███▍ | 125610/371472 [9:59:21<18:48:22, 3.63it/s] 34%|███▍ | 125611/371472 [9:59:22<19:48:15, 3.45it/s] 34%|███▍ | 125612/371472 [9:59:22<19:37:12, 3.48it/s] 34%|███▍ | 125613/371472 [9:59:22<20:38:17, 3.31it/s] 34%|███▍ | 125614/371472 [9:59:23<20:29:40, 3.33it/s] 34%|███▍ | 125615/371472 [9:59:23<20:22:30, 3.35it/s] 34%|███▍ | 125616/371472 [9:59:23<22:56:21, 2.98it/s] 34%|███▍ | 125617/371472 [9:59:24<23:06:52, 2.95it/s] 34%|███▍ | 125618/371472 [9:59:24<21:22:04, 3.20it/s] 34%|███▍ | 125619/371472 [9:59:24<21:12:52, 3.22it/s] 34%|███▍ | 125620/371472 [9:59:25<20:53:02, 3.27it/s] {'loss': 3.1769, 'learning_rate': 6.959695317718545e-07, 'epoch': 5.41} + 34%|███▍ | 125620/371472 [9:59:25<20:53:02, 3.27it/s] 34%|███▍ | 125621/371472 [9:59:25<19:49:29, 3.44it/s] 34%|███▍ | 125622/371472 [9:59:25<19:16:08, 3.54it/s] 34%|███▍ | 125623/371472 [9:59:25<18:16:51, 3.74it/s] 34%|███▍ | 125624/371472 [9:59:26<17:33:04, 3.89it/s] 34%|███▍ | 125625/371472 [9:59:26<17:53:05, 3.82it/s] 34%|███▍ | 125626/371472 [9:59:26<18:31:00, 3.69it/s] 34%|███▍ | 125627/371472 [9:59:26<18:45:51, 3.64it/s] 34%|███▍ | 125628/371472 [9:59:27<18:32:27, 3.68it/s] 34%|███▍ | 125629/371472 [9:59:27<18:53:03, 3.62it/s] 34%|███▍ | 125630/371472 [9:59:27<18:04:29, 3.78it/s] 34%|███▍ | 125631/371472 [9:59:28<18:24:48, 3.71it/s] 34%|███▍ | 125632/371472 [9:59:28<18:01:20, 3.79it/s] 34%|███▍ | 125633/371472 [9:59:28<18:23:28, 3.71it/s] 34%|███▍ | 125634/371472 [9:59:28<19:18:21, 3.54it/s] 34%|███▍ | 125635/371472 [9:59:29<18:40:36, 3.66it/s] 34%|███▍ | 125636/371472 [9:59:29<18:06:16, 3.77it/s] 34%|███▍ | 125637/371472 [9:59:29<19:15:48, 3.54it/s] 34%|███▍ | 125638/371472 [9:59:30<19:47:53, 3.45it/s] 34%|███▍ | 125639/371472 [9:59:30<19:23:02, 3.52it/s] 34%|███▍ | 125640/371472 [9:59:30<19:47:46, 3.45it/s] {'loss': 3.291, 'learning_rate': 6.959210497963757e-07, 'epoch': 5.41} + 34%|███▍ | 125640/371472 [9:59:30<19:47:46, 3.45it/s] 34%|███▍ | 125641/371472 [9:59:30<19:20:37, 3.53it/s] 34%|███▍ | 125642/371472 [9:59:31<19:36:23, 3.48it/s] 34%|███▍ | 125643/371472 [9:59:31<19:41:03, 3.47it/s] 34%|███▍ | 125644/371472 [9:59:31<19:46:45, 3.45it/s] 34%|███▍ | 125645/371472 [9:59:31<18:51:12, 3.62it/s] 34%|███▍ | 125646/371472 [9:59:32<19:42:27, 3.46it/s] 34%|███▍ | 125647/371472 [9:59:32<20:57:07, 3.26it/s] 34%|███▍ | 125648/371472 [9:59:32<19:45:50, 3.45it/s] 34%|███▍ | 125649/371472 [9:59:33<18:59:37, 3.60it/s] 34%|███▍ | 125650/371472 [9:59:33<18:48:03, 3.63it/s] 34%|███▍ | 125651/371472 [9:59:33<18:43:57, 3.65it/s] 34%|███▍ | 125652/371472 [9:59:33<19:20:02, 3.53it/s] 34%|███▍ | 125653/371472 [9:59:34<19:16:03, 3.54it/s] 34%|███▍ | 125654/371472 [9:59:34<19:02:01, 3.59it/s] 34%|███▍ | 125655/371472 [9:59:34<21:30:10, 3.18it/s] 34%|███▍ | 125656/371472 [9:59:35<20:00:03, 3.41it/s] 34%|███▍ | 125657/371472 [9:59:35<20:24:56, 3.34it/s] 34%|███▍ | 125658/371472 [9:59:35<20:05:45, 3.40it/s] 34%|███▍ | 125659/371472 [9:59:36<19:41:51, 3.47it/s] 34%|███▍ | 125660/371472 [9:59:36<21:28:25, 3.18it/s] {'loss': 3.1185, 'learning_rate': 6.958725678208968e-07, 'epoch': 5.41} + 34%|███▍ | 125660/371472 [9:59:36<21:28:25, 3.18it/s] 34%|███▍ | 125661/371472 [9:59:36<20:24:10, 3.35it/s] 34%|███▍ | 125662/371472 [9:59:36<19:59:31, 3.42it/s] 34%|███▍ | 125663/371472 [9:59:37<19:19:52, 3.53it/s] 34%|███▍ | 125664/371472 [9:59:37<20:20:55, 3.36it/s] 34%|███▍ | 125665/371472 [9:59:37<20:34:53, 3.32it/s] 34%|███▍ | 125666/371472 [9:59:38<19:53:42, 3.43it/s] 34%|███▍ | 125667/371472 [9:59:38<19:35:32, 3.48it/s] 34%|███▍ | 125668/371472 [9:59:38<20:01:18, 3.41it/s] 34%|███▍ | 125669/371472 [9:59:39<20:07:23, 3.39it/s] 34%|███▍ | 125670/371472 [9:59:39<20:09:27, 3.39it/s] 34%|███▍ | 125671/371472 [9:59:39<20:10:42, 3.38it/s] 34%|███▍ | 125672/371472 [9:59:39<20:39:59, 3.30it/s] 34%|███▍ | 125673/371472 [9:59:40<19:53:37, 3.43it/s] 34%|███▍ | 125674/371472 [9:59:40<19:14:52, 3.55it/s] 34%|███▍ | 125675/371472 [9:59:40<19:14:58, 3.55it/s] 34%|███▍ | 125676/371472 [9:59:41<18:53:21, 3.61it/s] 34%|███▍ | 125677/371472 [9:59:41<18:14:12, 3.74it/s] 34%|███▍ | 125678/371472 [9:59:41<18:06:47, 3.77it/s] 34%|███▍ | 125679/371472 [9:59:41<19:24:07, 3.52it/s] 34%|███▍ | 125680/371472 [9:59:42<18:46:42, 3.64it/s] {'loss': 3.138, 'learning_rate': 6.958240858454179e-07, 'epoch': 5.41} + 34%|███▍ | 125680/371472 [9:59:42<18:46:42, 3.64it/s] 34%|███▍ | 125681/371472 [9:59:42<18:41:23, 3.65it/s] 34%|███▍ | 125682/371472 [9:59:42<19:11:48, 3.56it/s] 34%|███▍ | 125683/371472 [9:59:42<18:21:13, 3.72it/s] 34%|███▍ | 125684/371472 [9:59:43<20:33:15, 3.32it/s] 34%|███▍ | 125685/371472 [9:59:43<19:42:35, 3.46it/s] 34%|███▍ | 125686/371472 [9:59:43<20:06:58, 3.39it/s] 34%|███▍ | 125687/371472 [9:59:44<20:55:10, 3.26it/s] 34%|███▍ | 125688/371472 [9:59:44<19:23:02, 3.52it/s] 34%|███▍ | 125689/371472 [9:59:44<19:24:54, 3.52it/s] 34%|███▍ | 125690/371472 [9:59:44<19:19:57, 3.53it/s] 34%|███▍ | 125691/371472 [9:59:45<18:54:40, 3.61it/s] 34%|███▍ | 125692/371472 [9:59:45<18:32:40, 3.68it/s] 34%|███▍ | 125693/371472 [9:59:45<18:27:19, 3.70it/s] 34%|███▍ | 125694/371472 [9:59:46<19:29:43, 3.50it/s] 34%|███▍ | 125695/371472 [9:59:46<19:07:56, 3.57it/s] 34%|███▍ | 125696/371472 [9:59:46<18:30:11, 3.69it/s] 34%|███▍ | 125697/371472 [9:59:46<18:12:05, 3.75it/s] 34%|███▍ | 125698/371472 [9:59:47<18:21:59, 3.72it/s] 34%|███▍ | 125699/371472 [9:59:47<17:54:41, 3.81it/s] 34%|███▍ | 125700/371472 [9:59:47<18:33:03, 3.68it/s] {'loss': 3.2235, 'learning_rate': 6.95775603869939e-07, 'epoch': 5.41} + 34%|███▍ | 125700/371472 [9:59:47<18:33:03, 3.68it/s] 34%|███▍ | 125701/371472 [9:59:47<18:11:55, 3.75it/s] 34%|███▍ | 125702/371472 [9:59:48<20:08:46, 3.39it/s] 34%|███▍ | 125703/371472 [9:59:48<19:43:40, 3.46it/s] 34%|███▍ | 125704/371472 [9:59:48<19:16:03, 3.54it/s] 34%|███▍ | 125705/371472 [9:59:49<18:36:32, 3.67it/s] 34%|███▍ | 125706/371472 [9:59:49<18:40:51, 3.65it/s] 34%|███▍ | 125707/371472 [9:59:49<18:22:37, 3.71it/s] 34%|███▍ | 125708/371472 [9:59:50<22:27:30, 3.04it/s] 34%|███▍ | 125709/371472 [9:59:50<20:49:38, 3.28it/s] 34%|███▍ | 125710/371472 [9:59:50<20:31:52, 3.33it/s] 34%|███▍ | 125711/371472 [9:59:50<19:36:19, 3.48it/s] 34%|███▍ | 125712/371472 [9:59:51<19:42:57, 3.46it/s] 34%|███▍ | 125713/371472 [9:59:51<19:24:49, 3.52it/s] 34%|███▍ | 125714/371472 [9:59:51<19:03:40, 3.58it/s] 34%|███▍ | 125715/371472 [9:59:51<18:28:03, 3.70it/s] 34%|███▍ | 125716/371472 [9:59:52<18:12:48, 3.75it/s] 34%|███▍ | 125717/371472 [9:59:52<17:42:11, 3.86it/s] 34%|███▍ | 125718/371472 [9:59:52<19:49:04, 3.44it/s] 34%|███▍ | 125719/371472 [9:59:53<19:44:55, 3.46it/s] 34%|███▍ | 125720/371472 [9:59:53<18:58:00, 3.60it/s] {'loss': 3.3726, 'learning_rate': 6.957271218944601e-07, 'epoch': 5.41} + 34%|███▍ | 125720/371472 [9:59:53<18:58:00, 3.60it/s] 34%|███▍ | 125721/371472 [9:59:53<18:59:06, 3.60it/s] 34%|███▍ | 125722/371472 [9:59:53<18:58:09, 3.60it/s] 34%|███▍ | 125723/371472 [9:59:54<18:43:10, 3.65it/s] 34%|███▍ | 125724/371472 [9:59:54<18:25:24, 3.71it/s] 34%|███▍ | 125725/371472 [9:59:54<19:22:17, 3.52it/s] 34%|███▍ | 125726/371472 [9:59:55<19:22:54, 3.52it/s] 34%|███▍ | 125727/371472 [9:59:55<18:38:03, 3.66it/s] 34%|███▍ | 125728/371472 [9:59:55<18:53:25, 3.61it/s] 34%|███▍ | 125729/371472 [9:59:55<18:20:00, 3.72it/s] 34%|███▍ | 125730/371472 [9:59:56<17:27:53, 3.91it/s] 34%|███▍ | 125731/371472 [9:59:56<18:55:47, 3.61it/s] 34%|███▍ | 125732/371472 [9:59:56<18:29:32, 3.69it/s] 34%|███▍ | 125733/371472 [9:59:56<18:29:11, 3.69it/s] 34%|███▍ | 125734/371472 [9:59:57<18:31:45, 3.68it/s] 34%|███▍ | 125735/371472 [9:59:57<20:01:21, 3.41it/s] 34%|███▍ | 125736/371472 [9:59:57<20:16:08, 3.37it/s] 34%|███▍ | 125737/371472 [9:59:58<19:20:49, 3.53it/s] 34%|███▍ | 125738/371472 [9:59:58<19:36:30, 3.48it/s] 34%|███▍ | 125739/371472 [9:59:58<19:37:21, 3.48it/s] 34%|███▍ | 125740/371472 [9:59:58<19:30:35, 3.50it/s] {'loss': 3.2151, 'learning_rate': 6.956786399189812e-07, 'epoch': 5.42} + 34%|███▍ | 125740/371472 [9:59:58<19:30:35, 3.50it/s] 34%|███▍ | 125741/371472 [9:59:59<21:04:28, 3.24it/s] 34%|███▍ | 125742/371472 [9:59:59<22:02:19, 3.10it/s] 34%|███▍ | 125743/371472 [9:59:59<20:41:26, 3.30it/s] 34%|███▍ | 125744/371472 [10:00:00<19:21:45, 3.53it/s] 34%|███▍ | 125745/371472 [10:00:00<19:06:48, 3.57it/s] 34%|███▍ | 125746/371472 [10:00:00<18:22:47, 3.71it/s] 34%|███▍ | 125747/371472 [10:00:00<17:42:59, 3.85it/s] 34%|███▍ | 125748/371472 [10:00:01<19:40:42, 3.47it/s] 34%|███▍ | 125749/371472 [10:00:01<19:37:31, 3.48it/s] 34%|███▍ | 125750/371472 [10:00:01<19:33:00, 3.49it/s] 34%|███▍ | 125751/371472 [10:00:02<19:22:07, 3.52it/s] 34%|███▍ | 125752/371472 [10:00:02<18:47:05, 3.63it/s] 34%|███▍ | 125753/371472 [10:00:02<18:06:42, 3.77it/s] 34%|███▍ | 125754/371472 [10:00:02<18:17:26, 3.73it/s] 34%|███▍ | 125755/371472 [10:00:03<18:33:00, 3.68it/s] 34%|███▍ | 125756/371472 [10:00:03<18:36:55, 3.67it/s] 34%|███▍ | 125757/371472 [10:00:03<17:44:59, 3.85it/s] 34%|███▍ | 125758/371472 [10:00:03<17:45:14, 3.84it/s] 34%|███▍ | 125759/371472 [10:00:04<19:44:12, 3.46it/s] 34%|███▍ | 125760/371472 [10:00:04<20:23:21, 3.35it/s] {'loss': 3.1706, 'learning_rate': 6.956301579435022e-07, 'epoch': 5.42} + 34%|███▍ | 125760/371472 [10:00:04<20:23:21, 3.35it/s] 34%|███▍ | 125761/371472 [10:00:04<19:49:42, 3.44it/s] 34%|███▍ | 125762/371472 [10:00:05<19:21:52, 3.52it/s] 34%|███▍ | 125763/371472 [10:00:05<19:27:02, 3.51it/s] 34%|███▍ | 125764/371472 [10:00:05<18:43:24, 3.65it/s] 34%|███▍ | 125765/371472 [10:00:05<18:47:26, 3.63it/s] 34%|███▍ | 125766/371472 [10:00:06<18:40:51, 3.65it/s] 34%|███▍ | 125767/371472 [10:00:06<18:48:38, 3.63it/s] 34%|███▍ | 125768/371472 [10:00:06<19:32:35, 3.49it/s] 34%|███▍ | 125769/371472 [10:00:07<18:36:44, 3.67it/s] 34%|███▍ | 125770/371472 [10:00:07<19:23:04, 3.52it/s] 34%|███▍ | 125771/371472 [10:00:07<21:15:39, 3.21it/s] 34%|███▍ | 125772/371472 [10:00:08<21:34:08, 3.16it/s] 34%|███▍ | 125773/371472 [10:00:08<20:13:57, 3.37it/s] 34%|███▍ | 125774/371472 [10:00:08<19:38:24, 3.48it/s] 34%|███▍ | 125775/371472 [10:00:08<19:47:20, 3.45it/s] 34%|███▍ | 125776/371472 [10:00:09<19:53:53, 3.43it/s] 34%|███▍ | 125777/371472 [10:00:09<19:43:54, 3.46it/s] 34%|███▍ | 125778/371472 [10:00:09<18:41:18, 3.65it/s] 34%|███▍ | 125779/371472 [10:00:09<17:57:21, 3.80it/s] 34%|███▍ | 125780/371472 [10:00:10<18:33:14, 3.68it/s] {'loss': 3.3046, 'learning_rate': 6.955816759680234e-07, 'epoch': 5.42} + 34%|███▍ | 125780/371472 [10:00:10<18:33:14, 3.68it/s] 34%|███▍ | 125781/371472 [10:00:10<18:16:50, 3.73it/s] 34%|███▍ | 125782/371472 [10:00:10<18:55:44, 3.61it/s] 34%|███▍ | 125783/371472 [10:00:11<21:04:23, 3.24it/s] 34%|███▍ | 125784/371472 [10:00:11<19:53:03, 3.43it/s] 34%|███▍ | 125785/371472 [10:00:11<19:09:25, 3.56it/s] 34%|███▍ | 125786/371472 [10:00:11<18:26:05, 3.70it/s] 34%|███▍ | 125787/371472 [10:00:12<18:34:45, 3.67it/s] 34%|███▍ | 125788/371472 [10:00:12<18:39:25, 3.66it/s] 34%|███▍ | 125789/371472 [10:00:12<18:05:48, 3.77it/s] 34%|███▍ | 125790/371472 [10:00:13<18:32:15, 3.68it/s] 34%|███▍ | 125791/371472 [10:00:13<18:02:27, 3.78it/s] 34%|███▍ | 125792/371472 [10:00:13<17:42:57, 3.85it/s] 34%|███▍ | 125793/371472 [10:00:13<17:40:15, 3.86it/s] 34%|███▍ | 125794/371472 [10:00:14<17:34:20, 3.88it/s] 34%|███▍ | 125795/371472 [10:00:14<18:02:06, 3.78it/s] 34%|███▍ | 125796/371472 [10:00:14<17:49:52, 3.83it/s] 34%|███▍ | 125797/371472 [10:00:14<17:42:05, 3.86it/s] 34%|███▍ | 125798/371472 [10:00:15<17:53:47, 3.81it/s] 34%|███▍ | 125799/371472 [10:00:15<17:59:10, 3.79it/s] 34%|███▍ | 125800/371472 [10:00:15<18:22:50, 3.71it/s] {'loss': 3.2669, 'learning_rate': 6.955331939925446e-07, 'epoch': 5.42} + 34%|███▍ | 125800/371472 [10:00:15<18:22:50, 3.71it/s] 34%|███▍ | 125801/371472 [10:00:15<17:55:15, 3.81it/s] 34%|███▍ | 125802/371472 [10:00:16<17:42:04, 3.86it/s] 34%|███▍ | 125803/371472 [10:00:16<17:21:30, 3.93it/s] 34%|███▍ | 125804/371472 [10:00:16<17:53:22, 3.81it/s] 34%|███▍ | 125805/371472 [10:00:16<18:01:52, 3.78it/s] 34%|███▍ | 125806/371472 [10:00:17<17:40:43, 3.86it/s] 34%|███▍ | 125807/371472 [10:00:17<17:49:10, 3.83it/s] 34%|███▍ | 125808/371472 [10:00:17<18:21:34, 3.72it/s] 34%|███▍ | 125809/371472 [10:00:18<19:14:38, 3.55it/s] 34%|███▍ | 125810/371472 [10:00:18<18:25:23, 3.70it/s] 34%|███▍ | 125811/371472 [10:00:18<17:50:14, 3.83it/s] 34%|███▍ | 125812/371472 [10:00:18<18:01:58, 3.78it/s] 34%|███▍ | 125813/371472 [10:00:19<18:14:07, 3.74it/s] 34%|███▍ | 125814/371472 [10:00:19<17:41:50, 3.86it/s] 34%|███▍ | 125815/371472 [10:00:19<17:06:49, 3.99it/s] 34%|███▍ | 125816/371472 [10:00:19<16:59:20, 4.02it/s] 34%|███▍ | 125817/371472 [10:00:20<18:08:30, 3.76it/s] 34%|███▍ | 125818/371472 [10:00:20<17:53:01, 3.82it/s] 34%|███▍ | 125819/371472 [10:00:20<18:01:00, 3.79it/s] 34%|███▍ | 125820/371472 [10:00:20<18:02:11, 3.78it/s] {'loss': 3.2547, 'learning_rate': 6.954847120170657e-07, 'epoch': 5.42} + 34%|███▍ | 125820/371472 [10:00:20<18:02:11, 3.78it/s] 34%|███▍ | 125821/371472 [10:00:21<18:07:58, 3.76it/s] 34%|███▍ | 125822/371472 [10:00:21<17:58:32, 3.80it/s] 34%|███▍ | 125823/371472 [10:00:21<18:08:55, 3.76it/s] 34%|███▍ | 125824/371472 [10:00:21<17:44:27, 3.85it/s] 34%|███▍ | 125825/371472 [10:00:22<18:00:08, 3.79it/s] 34%|███▍ | 125826/371472 [10:00:22<18:23:21, 3.71it/s] 34%|███▍ | 125827/371472 [10:00:22<18:08:53, 3.76it/s] 34%|███▍ | 125828/371472 [10:00:23<19:20:27, 3.53it/s] 34%|███▍ | 125829/371472 [10:00:23<19:21:43, 3.52it/s] 34%|███▍ | 125830/371472 [10:00:23<18:31:46, 3.68it/s] 34%|███▍ | 125831/371472 [10:00:23<19:11:54, 3.55it/s] 34%|███▍ | 125832/371472 [10:00:24<18:43:59, 3.64it/s] 34%|███▍ | 125833/371472 [10:00:24<19:00:18, 3.59it/s] 34%|███▍ | 125834/371472 [10:00:24<19:05:04, 3.58it/s] 34%|███▍ | 125835/371472 [10:00:25<18:58:22, 3.60it/s] 34%|███▍ | 125836/371472 [10:00:25<19:19:35, 3.53it/s] 34%|███▍ | 125837/371472 [10:00:25<19:07:36, 3.57it/s] 34%|███▍ | 125838/371472 [10:00:25<18:30:42, 3.69it/s] 34%|███▍ | 125839/371472 [10:00:26<18:46:48, 3.63it/s] 34%|███▍ | 125840/371472 [10:00:26<19:01:20, 3.59it/s] {'loss': 3.2823, 'learning_rate': 6.954362300415866e-07, 'epoch': 5.42} + 34%|███▍ | 125840/371472 [10:00:26<19:01:20, 3.59it/s] 34%|███▍ | 125841/371472 [10:00:26<19:49:45, 3.44it/s] 34%|███▍ | 125842/371472 [10:00:27<20:30:58, 3.33it/s] 34%|███▍ | 125843/371472 [10:00:27<20:05:26, 3.40it/s] 34%|███▍ | 125844/371472 [10:00:27<19:35:35, 3.48it/s] 34%|███▍ | 125845/371472 [10:00:27<19:12:41, 3.55it/s] 34%|███▍ | 125846/371472 [10:00:28<18:47:37, 3.63it/s] 34%|███▍ | 125847/371472 [10:00:28<19:26:00, 3.51it/s] 34%|███▍ | 125848/371472 [10:00:28<19:33:44, 3.49it/s] 34%|███▍ | 125849/371472 [10:00:29<21:30:56, 3.17it/s] 34%|███▍ | 125850/371472 [10:00:29<20:37:42, 3.31it/s] 34%|███▍ | 125851/371472 [10:00:29<20:11:53, 3.38it/s] 34%|███▍ | 125852/371472 [10:00:29<19:29:51, 3.50it/s] 34%|███▍ | 125853/371472 [10:00:30<19:24:26, 3.52it/s] 34%|███▍ | 125854/371472 [10:00:30<19:11:31, 3.55it/s] 34%|███▍ | 125855/371472 [10:00:30<20:15:16, 3.37it/s] 34%|███▍ | 125856/371472 [10:00:31<21:14:08, 3.21it/s] 34%|███▍ | 125857/371472 [10:00:31<20:40:52, 3.30it/s] 34%|███▍ | 125858/371472 [10:00:31<20:12:21, 3.38it/s] 34%|███▍ | 125859/371472 [10:00:32<20:47:26, 3.28it/s] 34%|███▍ | 125860/371472 [10:00:32<20:07:11, 3.39it/s] {'loss': 3.2155, 'learning_rate': 6.953877480661078e-07, 'epoch': 5.42} + 34%|███▍ | 125860/371472 [10:00:32<20:07:11, 3.39it/s] 34%|███▍ | 125861/371472 [10:00:32<19:37:51, 3.48it/s] 34%|███▍ | 125862/371472 [10:00:32<18:49:10, 3.63it/s] 34%|███▍ | 125863/371472 [10:00:33<18:52:03, 3.62it/s] 34%|███▍ | 125864/371472 [10:00:33<18:23:57, 3.71it/s] 34%|███▍ | 125865/371472 [10:00:33<19:45:07, 3.45it/s] 34%|███▍ | 125866/371472 [10:00:33<18:48:58, 3.63it/s] 34%|███▍ | 125867/371472 [10:00:34<19:58:47, 3.41it/s] 34%|███▍ | 125868/371472 [10:00:34<18:54:37, 3.61it/s] 34%|███▍ | 125869/371472 [10:00:34<18:37:57, 3.66it/s] 34%|███▍ | 125870/371472 [10:00:35<18:27:27, 3.70it/s] 34%|███▍ | 125871/371472 [10:00:35<18:24:20, 3.71it/s] 34%|███▍ | 125872/371472 [10:00:35<17:53:33, 3.81it/s] 34%|███▍ | 125873/371472 [10:00:35<18:06:42, 3.77it/s] 34%|███▍ | 125874/371472 [10:00:36<18:44:58, 3.64it/s] 34%|███▍ | 125875/371472 [10:00:36<19:23:58, 3.52it/s] 34%|███▍ | 125876/371472 [10:00:36<18:35:02, 3.67it/s] 34%|███▍ | 125877/371472 [10:00:36<18:23:03, 3.71it/s] 34%|███▍ | 125878/371472 [10:00:37<17:55:35, 3.81it/s] 34%|███▍ | 125879/371472 [10:00:37<17:28:57, 3.90it/s] 34%|███▍ | 125880/371472 [10:00:37<17:24:47, 3.92it/s] {'loss': 3.4131, 'learning_rate': 6.95339266090629e-07, 'epoch': 5.42} + 34%|███▍ | 125880/371472 [10:00:37<17:24:47, 3.92it/s] 34%|███▍ | 125881/371472 [10:00:37<17:18:48, 3.94it/s] 34%|███▍ | 125882/371472 [10:00:38<17:44:18, 3.85it/s] 34%|███▍ | 125883/371472 [10:00:38<18:09:49, 3.76it/s] 34%|███▍ | 125884/371472 [10:00:38<18:46:33, 3.63it/s] 34%|███▍ | 125885/371472 [10:00:39<19:01:29, 3.59it/s] 34%|███▍ | 125886/371472 [10:00:39<19:08:16, 3.56it/s] 34%|███▍ | 125887/371472 [10:00:39<18:58:18, 3.60it/s] 34%|███▍ | 125888/371472 [10:00:39<18:32:39, 3.68it/s] 34%|███▍ | 125889/371472 [10:00:40<19:07:17, 3.57it/s] 34%|███▍ | 125890/371472 [10:00:40<18:59:49, 3.59it/s] 34%|███▍ | 125891/371472 [10:00:40<18:16:22, 3.73it/s] 34%|███▍ | 125892/371472 [10:00:40<18:00:13, 3.79it/s] 34%|███▍ | 125893/371472 [10:00:41<19:21:01, 3.53it/s] 34%|███▍ | 125894/371472 [10:00:41<18:54:54, 3.61it/s] 34%|███▍ | 125895/371472 [10:00:41<18:55:02, 3.61it/s] 34%|███▍ | 125896/371472 [10:00:42<19:20:22, 3.53it/s] 34%|███▍ | 125897/371472 [10:00:42<18:48:06, 3.63it/s] 34%|███▍ | 125898/371472 [10:00:42<18:24:11, 3.71it/s] 34%|███▍ | 125899/371472 [10:00:42<19:13:07, 3.55it/s] 34%|███▍ | 125900/371472 [10:00:43<18:50:03, 3.62it/s] {'loss': 3.2091, 'learning_rate': 6.952907841151501e-07, 'epoch': 5.42} + 34%|███▍ | 125900/371472 [10:00:43<18:50:03, 3.62it/s] 34%|███▍ | 125901/371472 [10:00:43<19:25:04, 3.51it/s] 34%|███▍ | 125902/371472 [10:00:43<18:22:15, 3.71it/s] 34%|███▍ | 125903/371472 [10:00:44<20:01:07, 3.41it/s] 34%|███▍ | 125904/371472 [10:00:44<20:44:46, 3.29it/s] 34%|███▍ | 125905/371472 [10:00:44<22:24:53, 3.04it/s] 34%|███▍ | 125906/371472 [10:00:45<22:01:21, 3.10it/s] 34%|███▍ | 125907/371472 [10:00:45<20:55:07, 3.26it/s] 34%|███▍ | 125908/371472 [10:00:45<19:24:51, 3.51it/s] 34%|███▍ | 125909/371472 [10:00:46<27:41:04, 2.46it/s] 34%|███▍ | 125910/371472 [10:00:46<25:07:36, 2.71it/s] 34%|███▍ | 125911/371472 [10:00:46<23:01:24, 2.96it/s] 34%|███▍ | 125912/371472 [10:00:47<21:40:18, 3.15it/s] 34%|███▍ | 125913/371472 [10:00:47<21:23:04, 3.19it/s] 34%|███▍ | 125914/371472 [10:00:47<20:48:41, 3.28it/s] 34%|███▍ | 125915/371472 [10:00:47<20:09:57, 3.38it/s] 34%|███▍ | 125916/371472 [10:00:48<19:07:06, 3.57it/s] 34%|███▍ | 125917/371472 [10:00:48<20:31:22, 3.32it/s] 34%|███▍ | 125918/371472 [10:00:48<20:12:41, 3.37it/s] 34%|███▍ | 125919/371472 [10:00:49<19:37:53, 3.47it/s] 34%|███▍ | 125920/371472 [10:00:49<19:20:02, 3.53it/s] {'loss': 3.246, 'learning_rate': 6.952423021396711e-07, 'epoch': 5.42} + 34%|███▍ | 125920/371472 [10:00:49<19:20:02, 3.53it/s] 34%|███▍ | 125921/371472 [10:00:49<21:47:59, 3.13it/s] 34%|███▍ | 125922/371472 [10:00:50<21:21:55, 3.19it/s] 34%|███▍ | 125923/371472 [10:00:50<20:32:40, 3.32it/s] 34%|███▍ | 125924/371472 [10:00:50<20:00:59, 3.41it/s] 34%|███▍ | 125925/371472 [10:00:50<19:10:50, 3.56it/s] 34%|███▍ | 125926/371472 [10:00:51<19:12:47, 3.55it/s] 34%|███▍ | 125927/371472 [10:00:51<19:41:21, 3.46it/s] 34%|███▍ | 125928/371472 [10:00:51<19:05:50, 3.57it/s] 34%|███▍ | 125929/371472 [10:00:51<18:14:00, 3.74it/s] 34%|███▍ | 125930/371472 [10:00:52<17:55:20, 3.81it/s] 34%|███▍ | 125931/371472 [10:00:52<19:57:43, 3.42it/s] 34%|███▍ | 125932/371472 [10:00:52<21:17:21, 3.20it/s] 34%|███▍ | 125933/371472 [10:00:53<19:56:18, 3.42it/s] 34%|███▍ | 125934/371472 [10:00:53<20:17:07, 3.36it/s] 34%|███▍ | 125935/371472 [10:00:53<20:53:26, 3.26it/s] 34%|███▍ | 125936/371472 [10:00:54<21:58:11, 3.10it/s] 34%|███▍ | 125937/371472 [10:00:54<20:39:56, 3.30it/s] 34%|███▍ | 125938/371472 [10:00:54<20:14:38, 3.37it/s] 34%|███▍ | 125939/371472 [10:00:55<21:04:41, 3.24it/s] 34%|███▍ | 125940/371472 [10:00:55<20:29:26, 3.33it/s] {'loss': 3.0609, 'learning_rate': 6.951938201641924e-07, 'epoch': 5.42} + 34%|███▍ | 125940/371472 [10:00:55<20:29:26, 3.33it/s] 34%|███▍ | 125941/371472 [10:00:55<20:16:26, 3.36it/s] 34%|███▍ | 125942/371472 [10:00:55<19:43:15, 3.46it/s] 34%|███▍ | 125943/371472 [10:00:56<19:32:45, 3.49it/s] 34%|███▍ | 125944/371472 [10:00:56<20:38:32, 3.30it/s] 34%|███▍ | 125945/371472 [10:00:56<21:03:17, 3.24it/s] 34%|███▍ | 125946/371472 [10:00:57<20:50:32, 3.27it/s] 34%|███▍ | 125947/371472 [10:00:57<20:11:59, 3.38it/s] 34%|███▍ | 125948/371472 [10:00:57<19:40:46, 3.47it/s] 34%|███▍ | 125949/371472 [10:00:57<19:04:11, 3.58it/s] 34%|███▍ | 125950/371472 [10:00:58<18:50:32, 3.62it/s] 34%|███▍ | 125951/371472 [10:00:58<19:02:19, 3.58it/s] 34%|███▍ | 125952/371472 [10:00:58<19:11:48, 3.55it/s] 34%|███▍ | 125953/371472 [10:00:59<18:40:47, 3.65it/s] 34%|███▍ | 125954/371472 [10:00:59<19:31:50, 3.49it/s] 34%|███▍ | 125955/371472 [10:00:59<18:55:42, 3.60it/s] 34%|███▍ | 125956/371472 [10:00:59<19:14:59, 3.54it/s] 34%|███▍ | 125957/371472 [10:01:00<18:44:46, 3.64it/s] 34%|███▍ | 125958/371472 [10:01:00<19:39:23, 3.47it/s] 34%|███▍ | 125959/371472 [10:01:00<19:22:34, 3.52it/s] 34%|███▍ | 125960/371472 [10:01:01<18:48:54, 3.62it/s] {'loss': 3.3931, 'learning_rate': 6.951453381887134e-07, 'epoch': 5.43} + 34%|███▍ | 125960/371472 [10:01:01<18:48:54, 3.62it/s] 34%|███▍ | 125961/371472 [10:01:01<18:30:48, 3.68it/s] 34%|███▍ | 125962/371472 [10:01:01<18:24:19, 3.71it/s] 34%|███▍ | 125963/371472 [10:01:01<18:22:12, 3.71it/s] 34%|███▍ | 125964/371472 [10:01:02<18:08:15, 3.76it/s] 34%|███▍ | 125965/371472 [10:01:02<17:42:07, 3.85it/s] 34%|███▍ | 125966/371472 [10:01:02<17:45:25, 3.84it/s] 34%|███▍ | 125967/371472 [10:01:02<18:01:22, 3.78it/s] 34%|███▍ | 125968/371472 [10:01:03<18:58:16, 3.59it/s] 34%|███▍ | 125969/371472 [10:01:03<19:14:31, 3.54it/s] 34%|███▍ | 125970/371472 [10:01:03<20:37:58, 3.31it/s] 34%|███▍ | 125971/371472 [10:01:04<19:47:23, 3.45it/s] 34%|███▍ | 125972/371472 [10:01:04<18:49:28, 3.62it/s] 34%|███▍ | 125973/371472 [10:01:04<18:41:23, 3.65it/s] 34%|███▍ | 125974/371472 [10:01:04<18:40:57, 3.65it/s] 34%|███▍ | 125975/371472 [10:01:05<18:23:09, 3.71it/s] 34%|███▍ | 125976/371472 [10:01:05<18:48:12, 3.63it/s] 34%|███▍ | 125977/371472 [10:01:05<20:09:13, 3.38it/s] 34%|███▍ | 125978/371472 [10:01:06<20:06:00, 3.39it/s] 34%|███▍ | 125979/371472 [10:01:06<20:21:03, 3.35it/s] 34%|███▍ | 125980/371472 [10:01:06<21:01:37, 3.24it/s] {'loss': 3.1498, 'learning_rate': 6.950968562132345e-07, 'epoch': 5.43} + 34%|███▍ | 125980/371472 [10:01:06<21:01:37, 3.24it/s] 34%|███▍ | 125981/371472 [10:01:06<20:29:11, 3.33it/s] 34%|███▍ | 125982/371472 [10:01:07<19:33:07, 3.49it/s] 34%|███▍ | 125983/371472 [10:01:07<18:40:50, 3.65it/s] 34%|███▍ | 125984/371472 [10:01:07<19:55:24, 3.42it/s] 34%|███▍ | 125985/371472 [10:01:08<18:43:21, 3.64it/s] 34%|███▍ | 125986/371472 [10:01:08<18:40:09, 3.65it/s] 34%|███▍ | 125987/371472 [10:01:08<20:09:26, 3.38it/s] 34%|███▍ | 125988/371472 [10:01:08<20:38:09, 3.30it/s] 34%|███▍ | 125989/371472 [10:01:09<19:46:38, 3.45it/s] 34%|███▍ | 125990/371472 [10:01:09<19:29:37, 3.50it/s] 34%|███▍ | 125991/371472 [10:01:09<18:35:39, 3.67it/s] 34%|███▍ | 125992/371472 [10:01:10<18:22:53, 3.71it/s] 34%|███▍ | 125993/371472 [10:01:10<20:42:21, 3.29it/s] 34%|███▍ | 125994/371472 [10:01:10<20:02:16, 3.40it/s] 34%|███▍ | 125995/371472 [10:01:10<19:21:58, 3.52it/s] 34%|███▍ | 125996/371472 [10:01:11<19:24:04, 3.51it/s] 34%|███▍ | 125997/371472 [10:01:11<18:48:14, 3.63it/s] 34%|███▍ | 125998/371472 [10:01:11<18:25:43, 3.70it/s] 34%|███▍ | 125999/371472 [10:01:12<19:31:25, 3.49it/s] 34%|███▍ | 126000/371472 [10:01:12<18:37:47, 3.66it/s] {'loss': 3.2485, 'learning_rate': 6.950483742377555e-07, 'epoch': 5.43} + 34%|███▍ | 126000/371472 [10:01:12<18:37:47, 3.66it/s] 34%|███▍ | 126001/371472 [10:01:12<17:56:24, 3.80it/s] 34%|███▍ | 126002/371472 [10:01:12<17:30:42, 3.89it/s] 34%|███▍ | 126003/371472 [10:01:13<17:15:54, 3.95it/s] 34%|███▍ | 126004/371472 [10:01:13<18:39:35, 3.65it/s] 34%|███▍ | 126005/371472 [10:01:13<20:06:00, 3.39it/s] 34%|███▍ | 126006/371472 [10:01:14<21:36:54, 3.15it/s] 34%|███▍ | 126007/371472 [10:01:14<20:42:34, 3.29it/s] 34%|███▍ | 126008/371472 [10:01:14<20:13:56, 3.37it/s] 34%|███▍ | 126009/371472 [10:01:14<19:16:08, 3.54it/s] 34%|███▍ | 126010/371472 [10:01:15<19:35:48, 3.48it/s] 34%|███▍ | 126011/371472 [10:01:15<22:27:24, 3.04it/s] 34%|███▍ | 126012/371472 [10:01:15<20:32:26, 3.32it/s] 34%|███▍ | 126013/371472 [10:01:16<20:03:35, 3.40it/s] 34%|███▍ | 126014/371472 [10:01:16<19:28:02, 3.50it/s] 34%|███▍ | 126015/371472 [10:01:16<19:20:19, 3.53it/s] 34%|███▍ | 126016/371472 [10:01:16<19:51:19, 3.43it/s] 34%|███▍ | 126017/371472 [10:01:17<18:47:08, 3.63it/s] 34%|███▍ | 126018/371472 [10:01:17<19:22:36, 3.52it/s] 34%|███▍ | 126019/371472 [10:01:17<19:21:35, 3.52it/s] 34%|███▍ | 126020/371472 [10:01:18<19:21:16, 3.52it/s] {'loss': 3.2079, 'learning_rate': 6.949998922622767e-07, 'epoch': 5.43} + 34%|███▍ | 126020/371472 [10:01:18<19:21:16, 3.52it/s] 34%|███▍ | 126021/371472 [10:01:18<20:25:34, 3.34it/s] 34%|███▍ | 126022/371472 [10:01:18<20:25:54, 3.34it/s] 34%|███▍ | 126023/371472 [10:01:18<20:02:35, 3.40it/s] 34%|███▍ | 126024/371472 [10:01:19<19:03:14, 3.58it/s] 34%|███▍ | 126025/371472 [10:01:19<17:58:08, 3.79it/s] 34%|███▍ | 126026/371472 [10:01:19<18:06:29, 3.77it/s] 34%|███▍ | 126027/371472 [10:01:20<19:05:20, 3.57it/s] 34%|███▍ | 126028/371472 [10:01:20<18:39:00, 3.66it/s] 34%|███▍ | 126029/371472 [10:01:20<18:27:21, 3.69it/s] 34%|███▍ | 126030/371472 [10:01:20<19:22:22, 3.52it/s] 34%|███▍ | 126031/371472 [10:01:21<19:12:51, 3.55it/s] 34%|███▍ | 126032/371472 [10:01:21<18:36:09, 3.66it/s] 34%|███▍ | 126033/371472 [10:01:21<19:59:41, 3.41it/s] 34%|███▍ | 126034/371472 [10:01:22<19:47:39, 3.44it/s] 34%|███▍ | 126035/371472 [10:01:22<18:54:23, 3.61it/s] 34%|███▍ | 126036/371472 [10:01:22<20:46:18, 3.28it/s] 34%|███▍ | 126037/371472 [10:01:22<20:17:59, 3.36it/s] 34%|███▍ | 126038/371472 [10:01:23<19:26:42, 3.51it/s] 34%|███▍ | 126039/371472 [10:01:23<20:51:48, 3.27it/s] 34%|███▍ | 126040/371472 [10:01:23<20:29:47, 3.33it/s] {'loss': 3.3722, 'learning_rate': 6.949514102867978e-07, 'epoch': 5.43} + 34%|███▍ | 126040/371472 [10:01:23<20:29:47, 3.33it/s] 34%|███▍ | 126041/371472 [10:01:24<19:20:10, 3.53it/s] 34%|███▍ | 126042/371472 [10:01:24<19:12:20, 3.55it/s] 34%|███▍ | 126043/371472 [10:01:24<19:07:29, 3.56it/s] 34%|███▍ | 126044/371472 [10:01:24<20:16:48, 3.36it/s] 34%|███▍ | 126045/371472 [10:01:25<19:20:28, 3.52it/s] 34%|███▍ | 126046/371472 [10:01:25<18:53:40, 3.61it/s] 34%|███▍ | 126047/371472 [10:01:25<18:33:12, 3.67it/s] 34%|███▍ | 126048/371472 [10:01:25<17:55:38, 3.80it/s] 34%|███▍ | 126049/371472 [10:01:26<18:26:50, 3.70it/s] 34%|███▍ | 126050/371472 [10:01:26<17:59:46, 3.79it/s] 34%|███▍ | 126051/371472 [10:01:26<17:25:19, 3.91it/s] 34%|███▍ | 126052/371472 [10:01:27<18:11:23, 3.75it/s] 34%|███▍ | 126053/371472 [10:01:27<18:47:10, 3.63it/s] 34%|███▍ | 126054/371472 [10:01:27<18:01:19, 3.78it/s] 34%|███▍ | 126055/371472 [10:01:27<17:28:23, 3.90it/s] 34%|███▍ | 126056/371472 [10:01:28<18:30:45, 3.68it/s] 34%|███▍ | 126057/371472 [10:01:28<19:14:30, 3.54it/s] 34%|███▍ | 126058/371472 [10:01:28<20:08:00, 3.39it/s] 34%|███▍ | 126059/371472 [10:01:29<19:06:30, 3.57it/s] 34%|███▍ | 126060/371472 [10:01:29<18:22:37, 3.71it/s] {'loss': 2.9532, 'learning_rate': 6.949029283113188e-07, 'epoch': 5.43} + 34%|███▍ | 126060/371472 [10:01:29<18:22:37, 3.71it/s] 34%|███▍ | 126061/371472 [10:01:29<17:47:42, 3.83it/s] 34%|███▍ | 126062/371472 [10:01:29<17:34:19, 3.88it/s] 34%|███▍ | 126063/371472 [10:01:30<17:29:15, 3.90it/s] 34%|███▍ | 126064/371472 [10:01:30<18:13:52, 3.74it/s] 34%|███▍ | 126065/371472 [10:01:30<18:45:03, 3.64it/s] 34%|███▍ | 126066/371472 [10:01:30<19:11:39, 3.55it/s] 34%|███▍ | 126067/371472 [10:01:31<18:51:18, 3.62it/s] 34%|███▍ | 126068/371472 [10:01:31<18:43:18, 3.64it/s] 34%|███▍ | 126069/371472 [10:01:31<18:37:37, 3.66it/s] 34%|███▍ | 126070/371472 [10:01:31<18:01:07, 3.78it/s] 34%|███▍ | 126071/371472 [10:01:32<17:18:59, 3.94it/s] 34%|███▍ | 126072/371472 [10:01:32<17:02:06, 4.00it/s] 34%|███▍ | 126073/371472 [10:01:32<17:01:35, 4.00it/s] 34%|███▍ | 126074/371472 [10:01:32<17:58:23, 3.79it/s] 34%|███▍ | 126075/371472 [10:01:33<17:46:49, 3.83it/s] 34%|███▍ | 126076/371472 [10:01:33<17:56:35, 3.80it/s] 34%|███▍ | 126077/371472 [10:01:33<17:59:20, 3.79it/s] 34%|███▍ | 126078/371472 [10:01:34<18:59:44, 3.59it/s] 34%|███▍ | 126079/371472 [10:01:34<20:03:01, 3.40it/s] 34%|███▍ | 126080/371472 [10:01:34<20:15:09, 3.37it/s] {'loss': 3.3179, 'learning_rate': 6.9485444633584e-07, 'epoch': 5.43} + 34%|███▍ | 126080/371472 [10:01:34<20:15:09, 3.37it/s] 34%|███▍ | 126081/371472 [10:01:34<19:52:13, 3.43it/s] 34%|███▍ | 126082/371472 [10:01:35<20:02:33, 3.40it/s] 34%|███▍ | 126083/371472 [10:01:35<19:24:59, 3.51it/s] 34%|███▍ | 126084/371472 [10:01:35<19:17:30, 3.53it/s] 34%|███▍ | 126085/371472 [10:01:36<18:41:58, 3.65it/s] 34%|███▍ | 126086/371472 [10:01:36<19:32:38, 3.49it/s] 34%|███▍ | 126087/371472 [10:01:36<18:50:08, 3.62it/s] 34%|███▍ | 126088/371472 [10:01:36<20:13:37, 3.37it/s] 34%|███▍ | 126089/371472 [10:01:37<19:47:18, 3.44it/s] 34%|███▍ | 126090/371472 [10:01:37<19:14:18, 3.54it/s] 34%|███▍ | 126091/371472 [10:01:37<19:27:26, 3.50it/s] 34%|███▍ | 126092/371472 [10:01:38<19:23:54, 3.51it/s] 34%|███▍ | 126093/371472 [10:01:38<20:01:59, 3.40it/s] 34%|███▍ | 126094/371472 [10:01:38<19:28:29, 3.50it/s] 34%|███▍ | 126095/371472 [10:01:38<19:25:38, 3.51it/s] 34%|███▍ | 126096/371472 [10:01:39<19:13:41, 3.54it/s] 34%|███▍ | 126097/371472 [10:01:39<19:21:45, 3.52it/s] 34%|███▍ | 126098/371472 [10:01:39<21:26:21, 3.18it/s] 34%|███▍ | 126099/371472 [10:01:40<20:51:11, 3.27it/s] 34%|███▍ | 126100/371472 [10:01:40<19:47:48, 3.44it/s] {'loss': 3.17, 'learning_rate': 6.948059643603611e-07, 'epoch': 5.43} + 34%|███▍ | 126100/371472 [10:01:40<19:47:48, 3.44it/s] 34%|███▍ | 126101/371472 [10:01:40<19:57:49, 3.41it/s] 34%|███▍ | 126102/371472 [10:01:41<19:21:31, 3.52it/s] 34%|███▍ | 126103/371472 [10:01:41<18:53:19, 3.61it/s] 34%|███▍ | 126104/371472 [10:01:41<18:49:37, 3.62it/s] 34%|███▍ | 126105/371472 [10:01:41<19:43:03, 3.46it/s] 34%|███▍ | 126106/371472 [10:01:42<20:23:39, 3.34it/s] 34%|███▍ | 126107/371472 [10:01:42<20:48:25, 3.28it/s] 34%|███▍ | 126108/371472 [10:01:42<20:28:31, 3.33it/s] 34%|███▍ | 126109/371472 [10:01:43<20:39:32, 3.30it/s] 34%|███▍ | 126110/371472 [10:01:43<20:00:35, 3.41it/s] 34%|███▍ | 126111/371472 [10:01:43<19:19:35, 3.53it/s] 34%|███▍ | 126112/371472 [10:01:43<19:15:29, 3.54it/s] 34%|███▍ | 126113/371472 [10:01:44<19:38:10, 3.47it/s] 34%|███▍ | 126114/371472 [10:01:44<21:53:43, 3.11it/s] 34%|███▍ | 126115/371472 [10:01:44<20:55:23, 3.26it/s] 34%|███▍ | 126116/371472 [10:01:45<20:07:22, 3.39it/s] 34%|███▍ | 126117/371472 [10:01:45<20:03:04, 3.40it/s] 34%|███▍ | 126118/371472 [10:01:45<20:52:03, 3.27it/s] 34%|███▍ | 126119/371472 [10:01:46<20:37:17, 3.30it/s] 34%|███▍ | 126120/371472 [10:01:46<19:30:34, 3.49it/s] {'loss': 3.083, 'learning_rate': 6.947574823848822e-07, 'epoch': 5.43} + 34%|███▍ | 126120/371472 [10:01:46<19:30:34, 3.49it/s] 34%|███▍ | 126121/371472 [10:01:46<19:28:28, 3.50it/s] 34%|███▍ | 126122/371472 [10:01:46<18:38:30, 3.66it/s] 34%|███▍ | 126123/371472 [10:01:47<19:18:55, 3.53it/s] 34%|███▍ | 126124/371472 [10:01:47<18:13:58, 3.74it/s] 34%|███▍ | 126125/371472 [10:01:47<18:30:26, 3.68it/s] 34%|███▍ | 126126/371472 [10:01:47<18:59:47, 3.59it/s] 34%|███▍ | 126127/371472 [10:01:48<20:32:27, 3.32it/s] 34%|███▍ | 126128/371472 [10:01:48<19:54:00, 3.42it/s] 34%|███▍ | 126129/371472 [10:01:48<21:24:31, 3.18it/s] 34%|███▍ | 126130/371472 [10:01:49<21:40:02, 3.15it/s] 34%|███▍ | 126131/371472 [10:01:49<21:30:30, 3.17it/s] 34%|███▍ | 126132/371472 [10:01:49<20:53:58, 3.26it/s] 34%|███▍ | 126133/371472 [10:01:50<21:00:00, 3.25it/s] 34%|███▍ | 126134/371472 [10:01:50<20:01:39, 3.40it/s] 34%|███▍ | 126135/371472 [10:01:50<19:43:52, 3.45it/s] 34%|███▍ | 126136/371472 [10:01:50<18:47:41, 3.63it/s] 34%|███▍ | 126137/371472 [10:01:51<18:24:04, 3.70it/s] 34%|███▍ | 126138/371472 [10:01:51<18:37:25, 3.66it/s] 34%|███▍ | 126139/371472 [10:01:51<18:53:37, 3.61it/s] 34%|███▍ | 126140/371472 [10:01:52<19:22:12, 3.52it/s] {'loss': 3.2787, 'learning_rate': 6.947090004094032e-07, 'epoch': 5.43} + 34%|███▍ | 126140/371472 [10:01:52<19:22:12, 3.52it/s] 34%|███▍ | 126141/371472 [10:01:52<19:40:44, 3.46it/s] 34%|███▍ | 126142/371472 [10:01:52<19:43:23, 3.46it/s] 34%|███▍ | 126143/371472 [10:01:53<21:17:24, 3.20it/s] 34%|███▍ | 126144/371472 [10:01:53<19:59:40, 3.41it/s] 34%|███▍ | 126145/371472 [10:01:53<19:09:30, 3.56it/s] 34%|███▍ | 126146/371472 [10:01:53<18:47:15, 3.63it/s] 34%|███▍ | 126147/371472 [10:01:54<20:32:24, 3.32it/s] 34%|███▍ | 126148/371472 [10:01:54<19:34:28, 3.48it/s] 34%|███▍ | 126149/371472 [10:01:54<20:54:35, 3.26it/s] 34%|███▍ | 126150/371472 [10:01:55<19:48:08, 3.44it/s] 34%|███▍ | 126151/371472 [10:01:55<18:57:26, 3.59it/s] 34%|███▍ | 126152/371472 [10:01:55<18:34:04, 3.67it/s] 34%|███▍ | 126153/371472 [10:01:55<20:38:33, 3.30it/s] 34%|███▍ | 126154/371472 [10:01:56<19:14:44, 3.54it/s] 34%|███▍ | 126155/371472 [10:01:56<18:12:54, 3.74it/s] 34%|███▍ | 126156/371472 [10:01:56<18:35:42, 3.66it/s] 34%|███▍ | 126157/371472 [10:01:56<19:17:58, 3.53it/s] 34%|███▍ | 126158/371472 [10:01:57<18:52:40, 3.61it/s] 34%|███▍ | 126159/371472 [10:01:57<18:14:17, 3.74it/s] 34%|███▍ | 126160/371472 [10:01:57<18:06:11, 3.76it/s] {'loss': 3.1984, 'learning_rate': 6.946605184339244e-07, 'epoch': 5.43} + 34%|███▍ | 126160/371472 [10:01:57<18:06:11, 3.76it/s] 34%|███▍ | 126161/371472 [10:01:58<19:48:20, 3.44it/s] 34%|███▍ | 126162/371472 [10:01:58<18:58:37, 3.59it/s] 34%|███▍ | 126163/371472 [10:01:58<19:35:08, 3.48it/s] 34%|███▍ | 126164/371472 [10:01:58<18:50:59, 3.61it/s] 34%|███▍ | 126165/371472 [10:01:59<17:52:06, 3.81it/s] 34%|███▍ | 126166/371472 [10:01:59<20:51:35, 3.27it/s] 34%|███▍ | 126167/371472 [10:01:59<22:04:38, 3.09it/s] 34%|███▍ | 126168/371472 [10:02:00<21:47:51, 3.13it/s] 34%|███▍ | 126169/371472 [10:02:00<20:32:46, 3.32it/s] 34%|███▍ | 126170/371472 [10:02:00<19:07:57, 3.56it/s] 34%|███▍ | 126171/371472 [10:02:00<18:55:31, 3.60it/s] 34%|███▍ | 126172/371472 [10:02:01<18:20:01, 3.72it/s] 34%|███▍ | 126173/371472 [10:02:01<18:40:33, 3.65it/s] 34%|███▍ | 126174/371472 [10:02:01<18:12:14, 3.74it/s] 34%|███▍ | 126175/371472 [10:02:02<18:12:49, 3.74it/s] 34%|███▍ | 126176/371472 [10:02:02<18:00:10, 3.78it/s] 34%|███▍ | 126177/371472 [10:02:02<17:42:31, 3.85it/s] 34%|███▍ | 126178/371472 [10:02:02<18:54:22, 3.60it/s] 34%|███▍ | 126179/371472 [10:02:03<18:26:45, 3.69it/s] 34%|███▍ | 126180/371472 [10:02:03<19:03:49, 3.57it/s] {'loss': 3.236, 'learning_rate': 6.946120364584456e-07, 'epoch': 5.43} + 34%|███▍ | 126180/371472 [10:02:03<19:03:49, 3.57it/s] 34%|███▍ | 126181/371472 [10:02:03<19:09:08, 3.56it/s] 34%|███▍ | 126182/371472 [10:02:03<18:19:24, 3.72it/s] 34%|███▍ | 126183/371472 [10:02:04<17:38:44, 3.86it/s] 34%|███▍ | 126184/371472 [10:02:04<19:57:33, 3.41it/s] 34%|███▍ | 126185/371472 [10:02:04<18:48:09, 3.62it/s] 34%|███▍ | 126186/371472 [10:02:05<18:49:35, 3.62it/s] 34%|███▍ | 126187/371472 [10:02:05<18:57:45, 3.59it/s] 34%|███▍ | 126188/371472 [10:02:05<18:30:03, 3.68it/s] 34%|███▍ | 126189/371472 [10:02:05<18:38:08, 3.66it/s] 34%|███▍ | 126190/371472 [10:02:06<22:16:00, 3.06it/s] 34%|███▍ | 126191/371472 [10:02:06<22:22:52, 3.04it/s] 34%|███▍ | 126192/371472 [10:02:06<20:42:14, 3.29it/s] 34%|███▍ | 126193/371472 [10:02:07<21:06:16, 3.23it/s] 34%|███▍ | 126194/371472 [10:02:07<19:47:30, 3.44it/s] 34%|███▍ | 126195/371472 [10:02:07<19:49:03, 3.44it/s] 34%|███▍ | 126196/371472 [10:02:08<19:53:36, 3.42it/s] 34%|███▍ | 126197/371472 [10:02:08<20:06:21, 3.39it/s] 34%|███▍ | 126198/371472 [10:02:08<19:37:33, 3.47it/s] 34%|███▍ | 126199/371472 [10:02:08<20:01:24, 3.40it/s] 34%|███▍ | 126200/371472 [10:02:09<19:43:44, 3.45it/s] {'loss': 3.1788, 'learning_rate': 6.945635544829667e-07, 'epoch': 5.44} + 34%|███▍ | 126200/371472 [10:02:09<19:43:44, 3.45it/s] 34%|███▍ | 126201/371472 [10:02:09<19:20:04, 3.52it/s] 34%|███▍ | 126202/371472 [10:02:09<20:06:47, 3.39it/s] 34%|███▍ | 126203/371472 [10:02:10<19:15:54, 3.54it/s] 34%|███▍ | 126204/371472 [10:02:10<19:42:00, 3.46it/s] 34%|███▍ | 126205/371472 [10:02:10<20:18:43, 3.35it/s] 34%|███▍ | 126206/371472 [10:02:11<20:51:08, 3.27it/s] 34%|███▍ | 126207/371472 [10:02:11<19:23:35, 3.51it/s] 34%|███▍ | 126208/371472 [10:02:11<18:43:05, 3.64it/s] 34%|███▍ | 126209/371472 [10:02:11<18:19:07, 3.72it/s] 34%|███▍ | 126210/371472 [10:02:12<18:21:51, 3.71it/s] 34%|███▍ | 126211/371472 [10:02:12<19:22:14, 3.52it/s] 34%|███▍ | 126212/371472 [10:02:12<18:37:34, 3.66it/s] 34%|███▍ | 126213/371472 [10:02:12<20:10:06, 3.38it/s] 34%|███▍ | 126214/371472 [10:02:13<21:55:25, 3.11it/s] 34%|███▍ | 126215/371472 [10:02:13<23:37:49, 2.88it/s] 34%|███▍ | 126216/371472 [10:02:13<21:28:11, 3.17it/s] 34%|███▍ | 126217/371472 [10:02:14<20:18:32, 3.35it/s] 34%|███▍ | 126218/371472 [10:02:14<19:13:58, 3.54it/s] 34%|███▍ | 126219/371472 [10:02:14<20:12:40, 3.37it/s] 34%|███▍ | 126220/371472 [10:02:15<19:45:31, 3.45it/s] {'loss': 3.2153, 'learning_rate': 6.945150725074877e-07, 'epoch': 5.44} + 34%|███▍ | 126220/371472 [10:02:15<19:45:31, 3.45it/s] 34%|███▍ | 126221/371472 [10:02:15<19:39:11, 3.47it/s] 34%|███▍ | 126222/371472 [10:02:15<19:16:50, 3.53it/s] 34%|███▍ | 126223/371472 [10:02:15<19:12:40, 3.55it/s] 34%|███▍ | 126224/371472 [10:02:16<19:23:00, 3.51it/s] 34%|███▍ | 126225/371472 [10:02:16<18:52:50, 3.61it/s] 34%|███▍ | 126226/371472 [10:02:16<19:10:01, 3.55it/s] 34%|███▍ | 126227/371472 [10:02:17<19:53:43, 3.42it/s] 34%|███▍ | 126228/371472 [10:02:17<20:01:42, 3.40it/s] 34%|███▍ | 126229/371472 [10:02:17<19:17:04, 3.53it/s] 34%|███▍ | 126230/371472 [10:02:18<22:56:05, 2.97it/s] 34%|███▍ | 126231/371472 [10:02:18<21:38:06, 3.15it/s] 34%|███▍ | 126232/371472 [10:02:18<21:57:15, 3.10it/s] 34%|███▍ | 126233/371472 [10:02:19<21:49:19, 3.12it/s] 34%|███▍ | 126234/371472 [10:02:19<21:02:52, 3.24it/s] 34%|███▍ | 126235/371472 [10:02:19<21:33:02, 3.16it/s] 34%|███▍ | 126236/371472 [10:02:19<20:25:54, 3.33it/s] 34%|███▍ | 126237/371472 [10:02:20<21:03:04, 3.24it/s] 34%|███▍ | 126238/371472 [10:02:20<20:05:17, 3.39it/s] 34%|███▍ | 126239/371472 [10:02:20<19:17:48, 3.53it/s] 34%|███▍ | 126240/371472 [10:02:20<18:32:19, 3.67it/s] {'loss': 3.2417, 'learning_rate': 6.944665905320088e-07, 'epoch': 5.44} + 34%|███▍ | 126240/371472 [10:02:20<18:32:19, 3.67it/s] 34%|███▍ | 126241/371472 [10:02:21<18:49:05, 3.62it/s] 34%|███▍ | 126242/371472 [10:02:21<18:55:57, 3.60it/s] 34%|███▍ | 126243/371472 [10:02:21<18:44:04, 3.64it/s] 34%|███▍ | 126244/371472 [10:02:22<19:14:48, 3.54it/s] 34%|███▍ | 126245/371472 [10:02:22<18:48:46, 3.62it/s] 34%|███▍ | 126246/371472 [10:02:22<18:18:54, 3.72it/s] 34%|███▍ | 126247/371472 [10:02:22<19:28:16, 3.50it/s] 34%|███▍ | 126248/371472 [10:02:23<18:55:29, 3.60it/s] 34%|███▍ | 126249/371472 [10:02:23<18:20:19, 3.71it/s] 34%|███▍ | 126250/371472 [10:02:23<18:09:39, 3.75it/s] 34%|███▍ | 126251/371472 [10:02:24<19:08:03, 3.56it/s] 34%|███▍ | 126252/371472 [10:02:24<19:30:36, 3.49it/s] 34%|███▍ | 126253/371472 [10:02:24<18:58:30, 3.59it/s] 34%|███▍ | 126254/371472 [10:02:24<18:26:12, 3.69it/s] 34%|███▍ | 126255/371472 [10:02:25<19:04:44, 3.57it/s] 34%|███▍ | 126256/371472 [10:02:25<18:08:33, 3.75it/s] 34%|███▍ | 126257/371472 [10:02:25<18:40:34, 3.65it/s] 34%|███▍ | 126258/371472 [10:02:25<18:23:20, 3.70it/s] 34%|███▍ | 126259/371472 [10:02:26<19:33:01, 3.48it/s] 34%|███▍ | 126260/371472 [10:02:26<18:46:12, 3.63it/s] {'loss': 3.1596, 'learning_rate': 6.9441810855653e-07, 'epoch': 5.44} + 34%|███▍ | 126260/371472 [10:02:26<18:46:12, 3.63it/s] 34%|███▍ | 126261/371472 [10:02:26<18:51:41, 3.61it/s] 34%|███▍ | 126262/371472 [10:02:27<18:38:02, 3.66it/s] 34%|███▍ | 126263/371472 [10:02:27<18:23:20, 3.70it/s] 34%|███▍ | 126264/371472 [10:02:27<18:52:54, 3.61it/s] 34%|███▍ | 126265/371472 [10:02:27<18:05:05, 3.77it/s] 34%|███▍ | 126266/371472 [10:02:28<19:01:39, 3.58it/s] 34%|███▍ | 126267/371472 [10:02:28<18:29:07, 3.68it/s] 34%|███▍ | 126268/371472 [10:02:28<18:31:56, 3.68it/s] 34%|███▍ | 126269/371472 [10:02:29<19:44:51, 3.45it/s] 34%|███▍ | 126270/371472 [10:02:29<21:11:28, 3.21it/s] 34%|███▍ | 126271/371472 [10:02:29<20:26:58, 3.33it/s] 34%|███▍ | 126272/371472 [10:02:29<20:50:18, 3.27it/s] 34%|███▍ | 126273/371472 [10:02:30<20:00:48, 3.40it/s] 34%|███▍ | 126274/371472 [10:02:30<19:53:23, 3.42it/s] 34%|███▍ | 126275/371472 [10:02:30<19:18:16, 3.53it/s] 34%|███▍ | 126276/371472 [10:02:31<19:02:53, 3.58it/s] 34%|███▍ | 126277/371472 [10:02:31<19:02:06, 3.58it/s] 34%|███▍ | 126278/371472 [10:02:31<18:55:56, 3.60it/s] 34%|███▍ | 126279/371472 [10:02:31<18:03:35, 3.77it/s] 34%|███▍ | 126280/371472 [10:02:32<17:19:22, 3.93it/s] {'loss': 3.2614, 'learning_rate': 6.943696265810511e-07, 'epoch': 5.44} + 34%|███▍ | 126280/371472 [10:02:32<17:19:22, 3.93it/s] 34%|███▍ | 126281/371472 [10:02:32<18:22:05, 3.71it/s] 34%|███▍ | 126282/371472 [10:02:32<17:58:39, 3.79it/s] 34%|███▍ | 126283/371472 [10:02:32<18:26:10, 3.69it/s] 34%|███▍ | 126284/371472 [10:02:33<17:51:48, 3.81it/s] 34%|███▍ | 126285/371472 [10:02:33<17:52:34, 3.81it/s] 34%|███▍ | 126286/371472 [10:02:33<18:46:37, 3.63it/s] 34%|███▍ | 126287/371472 [10:02:34<18:23:38, 3.70it/s] 34%|███▍ | 126288/371472 [10:02:34<17:56:21, 3.80it/s] 34%|███▍ | 126289/371472 [10:02:34<18:29:42, 3.68it/s] 34%|███▍ | 126290/371472 [10:02:34<19:19:46, 3.52it/s] 34%|███▍ | 126291/371472 [10:02:35<18:44:02, 3.64it/s] 34%|███▍ | 126292/371472 [10:02:35<20:01:20, 3.40it/s] 34%|███▍ | 126293/371472 [10:02:35<19:04:38, 3.57it/s] 34%|███▍ | 126294/371472 [10:02:36<21:09:59, 3.22it/s] 34%|███▍ | 126295/371472 [10:02:36<20:10:25, 3.38it/s] 34%|███▍ | 126296/371472 [10:02:36<19:06:10, 3.57it/s] 34%|███▍ | 126297/371472 [10:02:36<18:22:35, 3.71it/s] 34%|███▍ | 126298/371472 [10:02:37<17:58:43, 3.79it/s] 34%|███▍ | 126299/371472 [10:02:37<19:57:10, 3.41it/s] 34%|███▍ | 126300/371472 [10:02:37<19:26:02, 3.50it/s] {'loss': 3.1875, 'learning_rate': 6.943211446055721e-07, 'epoch': 5.44} + 34%|███▍ | 126300/371472 [10:02:37<19:26:02, 3.50it/s] 34%|███▍ | 126301/371472 [10:02:38<20:07:28, 3.38it/s] 34%|███▍ | 126302/371472 [10:02:38<19:04:36, 3.57it/s] 34%|███▍ | 126303/371472 [10:02:38<18:34:07, 3.67it/s] 34%|███▍ | 126304/371472 [10:02:38<18:31:39, 3.68it/s] 34%|███▍ | 126305/371472 [10:02:39<19:19:30, 3.52it/s] 34%|███▍ | 126306/371472 [10:02:39<18:48:18, 3.62it/s] 34%|███▍ | 126307/371472 [10:02:39<19:02:47, 3.58it/s] 34%|███▍ | 126308/371472 [10:02:39<19:48:18, 3.44it/s] 34%|███▍ | 126309/371472 [10:02:40<18:57:55, 3.59it/s] 34%|███▍ | 126310/371472 [10:02:40<18:01:05, 3.78it/s] 34%|███▍ | 126311/371472 [10:02:40<18:07:40, 3.76it/s] 34%|███▍ | 126312/371472 [10:02:41<18:55:30, 3.60it/s] 34%|███▍ | 126313/371472 [10:02:41<18:28:39, 3.69it/s] 34%|███▍ | 126314/371472 [10:02:41<18:34:04, 3.67it/s] 34%|███▍ | 126315/371472 [10:02:41<19:11:32, 3.55it/s] 34%|███▍ | 126316/371472 [10:02:42<18:47:37, 3.62it/s] 34%|███▍ | 126317/371472 [10:02:42<20:30:46, 3.32it/s] 34%|███▍ | 126318/371472 [10:02:42<19:48:28, 3.44it/s] 34%|███▍ | 126319/371472 [10:02:43<21:17:17, 3.20it/s] 34%|███▍ | 126320/371472 [10:02:43<20:37:50, 3.30it/s] {'loss': 3.26, 'learning_rate': 6.942726626300933e-07, 'epoch': 5.44} + 34%|███▍ | 126320/371472 [10:02:43<20:37:50, 3.30it/s] 34%|███▍ | 126321/371472 [10:02:43<20:12:18, 3.37it/s] 34%|███▍ | 126322/371472 [10:02:43<19:35:32, 3.48it/s] 34%|███▍ | 126323/371472 [10:02:44<21:19:00, 3.19it/s] 34%|███▍ | 126324/371472 [10:02:44<19:57:17, 3.41it/s] 34%|███▍ | 126325/371472 [10:02:44<18:55:05, 3.60it/s] 34%|███▍ | 126326/371472 [10:02:45<19:46:09, 3.44it/s] 34%|███▍ | 126327/371472 [10:02:45<19:49:09, 3.44it/s] 34%|███▍ | 126328/371472 [10:02:45<20:46:13, 3.28it/s] 34%|███▍ | 126329/371472 [10:02:46<21:52:19, 3.11it/s] 34%|███▍ | 126330/371472 [10:02:46<20:43:29, 3.29it/s] 34%|███▍ | 126331/371472 [10:02:46<20:12:44, 3.37it/s] 34%|███▍ | 126332/371472 [10:02:46<19:26:24, 3.50it/s] 34%|███▍ | 126333/371472 [10:02:47<18:35:03, 3.66it/s] 34%|███▍ | 126334/371472 [10:02:47<18:37:52, 3.65it/s] 34%|███▍ | 126335/371472 [10:02:47<18:29:56, 3.68it/s] 34%|███▍ | 126336/371472 [10:02:48<19:48:44, 3.44it/s] 34%|███▍ | 126337/371472 [10:02:48<19:05:54, 3.57it/s] 34%|███▍ | 126338/371472 [10:02:48<19:19:17, 3.52it/s] 34%|███▍ | 126339/371472 [10:02:48<19:42:29, 3.46it/s] 34%|███▍ | 126340/371472 [10:02:49<18:34:08, 3.67it/s] {'loss': 3.1076, 'learning_rate': 6.942241806546145e-07, 'epoch': 5.44} + 34%|███▍ | 126340/371472 [10:02:49<18:34:08, 3.67it/s] 34%|███▍ | 126341/371472 [10:02:49<18:00:35, 3.78it/s] 34%|███▍ | 126342/371472 [10:02:49<17:43:14, 3.84it/s] 34%|███▍ | 126343/371472 [10:02:49<18:00:40, 3.78it/s] 34%|███▍ | 126344/371472 [10:02:50<17:47:34, 3.83it/s] 34%|███▍ | 126345/371472 [10:02:50<19:21:32, 3.52it/s] 34%|███▍ | 126346/371472 [10:02:50<18:18:26, 3.72it/s] 34%|███▍ | 126347/371472 [10:02:51<18:42:44, 3.64it/s] 34%|███▍ | 126348/371472 [10:02:51<19:21:17, 3.52it/s] 34%|███▍ | 126349/371472 [10:02:51<19:17:32, 3.53it/s] 34%|███▍ | 126350/371472 [10:02:51<18:47:27, 3.62it/s] 34%|███▍ | 126351/371472 [10:02:52<18:22:18, 3.71it/s] 34%|███▍ | 126352/371472 [10:02:52<20:22:56, 3.34it/s] 34%|███▍ | 126353/371472 [10:02:52<20:24:49, 3.34it/s] 34%|███▍ | 126354/371472 [10:02:53<19:15:26, 3.54it/s] 34%|███▍ | 126355/371472 [10:02:53<20:34:56, 3.31it/s] 34%|███▍ | 126356/371472 [10:02:53<20:09:28, 3.38it/s] 34%|███▍ | 126357/371472 [10:02:53<20:24:22, 3.34it/s] 34%|███▍ | 126358/371472 [10:02:54<21:37:52, 3.15it/s] 34%|███▍ | 126359/371472 [10:02:54<21:57:17, 3.10it/s] 34%|███▍ | 126360/371472 [10:02:54<20:53:17, 3.26it/s] {'loss': 3.0644, 'learning_rate': 6.941756986791355e-07, 'epoch': 5.44} + 34%|███▍ | 126360/371472 [10:02:54<20:53:17, 3.26it/s] 34%|███▍ | 126361/371472 [10:02:55<20:28:46, 3.32it/s] 34%|███▍ | 126362/371472 [10:02:55<19:58:25, 3.41it/s] 34%|███▍ | 126363/371472 [10:02:55<19:26:39, 3.50it/s] 34%|███▍ | 126364/371472 [10:02:56<19:40:09, 3.46it/s] 34%|███▍ | 126365/371472 [10:02:56<19:19:23, 3.52it/s] 34%|███▍ | 126366/371472 [10:02:56<19:30:31, 3.49it/s] 34%|███▍ | 126367/371472 [10:02:56<19:14:22, 3.54it/s] 34%|███▍ | 126368/371472 [10:02:57<18:46:30, 3.63it/s] 34%|███▍ | 126369/371472 [10:02:57<18:00:04, 3.78it/s] 34%|███▍ | 126370/371472 [10:02:57<17:58:57, 3.79it/s] 34%|███▍ | 126371/371472 [10:02:57<17:39:38, 3.86it/s] 34%|███▍ | 126372/371472 [10:02:58<17:36:47, 3.87it/s] 34%|███▍ | 126373/371472 [10:02:58<17:33:39, 3.88it/s] 34%|███▍ | 126374/371472 [10:02:58<18:12:13, 3.74it/s] 34%|███▍ | 126375/371472 [10:02:59<19:30:29, 3.49it/s] 34%|███▍ | 126376/371472 [10:02:59<20:37:09, 3.30it/s] 34%|███▍ | 126377/371472 [10:02:59<19:23:33, 3.51it/s] 34%|███▍ | 126378/371472 [10:02:59<18:42:53, 3.64it/s] 34%|███▍ | 126379/371472 [10:03:00<18:44:51, 3.63it/s] 34%|███▍ | 126380/371472 [10:03:00<18:06:56, 3.76it/s] {'loss': 3.344, 'learning_rate': 6.941272167036565e-07, 'epoch': 5.44} + 34%|███▍ | 126380/371472 [10:03:00<18:06:56, 3.76it/s] 34%|███▍ | 126381/371472 [10:03:00<18:18:47, 3.72it/s] 34%|███▍ | 126382/371472 [10:03:00<17:38:29, 3.86it/s] 34%|███▍ | 126383/371472 [10:03:01<17:53:25, 3.81it/s] 34%|███▍ | 126384/371472 [10:03:01<17:29:02, 3.89it/s] 34%|███▍ | 126385/371472 [10:03:01<17:10:19, 3.96it/s] 34%|███▍ | 126386/371472 [10:03:01<17:41:53, 3.85it/s] 34%|███▍ | 126387/371472 [10:03:02<17:41:33, 3.85it/s] 34%|███▍ | 126388/371472 [10:03:02<17:30:42, 3.89it/s] 34%|███▍ | 126389/371472 [10:03:02<19:00:59, 3.58it/s] 34%|███▍ | 126390/371472 [10:03:03<18:24:30, 3.70it/s] 34%|███▍ | 126391/371472 [10:03:03<17:55:21, 3.80it/s] 34%|███▍ | 126392/371472 [10:03:03<18:26:01, 3.69it/s] 34%|███▍ | 126393/371472 [10:03:03<18:19:01, 3.72it/s] 34%|███▍ | 126394/371472 [10:03:04<17:54:17, 3.80it/s] 34%|███▍ | 126395/371472 [10:03:04<17:43:38, 3.84it/s] 34%|███▍ | 126396/371472 [10:03:04<17:30:04, 3.89it/s] 34%|███▍ | 126397/371472 [10:03:04<19:21:51, 3.52it/s] 34%|███▍ | 126398/371472 [10:03:05<19:36:48, 3.47it/s] 34%|███▍ | 126399/371472 [10:03:05<18:46:29, 3.63it/s] 34%|███▍ | 126400/371472 [10:03:05<18:39:24, 3.65it/s] {'loss': 3.4049, 'learning_rate': 6.940787347281777e-07, 'epoch': 5.44} + 34%|███▍ | 126400/371472 [10:03:05<18:39:24, 3.65it/s] 34%|███▍ | 126401/371472 [10:03:06<19:19:35, 3.52it/s] 34%|███▍ | 126402/371472 [10:03:06<19:03:32, 3.57it/s] 34%|███▍ | 126403/371472 [10:03:06<21:13:39, 3.21it/s] 34%|███▍ | 126404/371472 [10:03:06<20:10:22, 3.37it/s] 34%|███▍ | 126405/371472 [10:03:07<20:02:16, 3.40it/s] 34%|███▍ | 126406/371472 [10:03:07<20:33:42, 3.31it/s] 34%|███▍ | 126407/371472 [10:03:07<19:31:52, 3.49it/s] 34%|███▍ | 126408/371472 [10:03:08<18:47:47, 3.62it/s] 34%|███▍ | 126409/371472 [10:03:08<20:13:28, 3.37it/s] 34%|███▍ | 126410/371472 [10:03:08<19:51:29, 3.43it/s] 34%|███▍ | 126411/371472 [10:03:08<19:36:36, 3.47it/s] 34%|███▍ | 126412/371472 [10:03:09<19:55:57, 3.42it/s] 34%|███▍ | 126413/371472 [10:03:09<19:16:40, 3.53it/s] 34%|███▍ | 126414/371472 [10:03:09<20:25:57, 3.33it/s] 34%|███▍ | 126415/371472 [10:03:10<19:38:48, 3.46it/s] 34%|█���█▍ | 126416/371472 [10:03:10<19:33:02, 3.48it/s] 34%|███▍ | 126417/371472 [10:03:10<19:16:38, 3.53it/s] 34%|███▍ | 126418/371472 [10:03:10<18:32:16, 3.67it/s] 34%|███▍ | 126419/371472 [10:03:11<18:38:54, 3.65it/s] 34%|███▍ | 126420/371472 [10:03:11<17:52:55, 3.81it/s] {'loss': 3.2218, 'learning_rate': 6.940302527526989e-07, 'epoch': 5.45} + 34%|███▍ | 126420/371472 [10:03:11<17:52:55, 3.81it/s] 34%|███▍ | 126421/371472 [10:03:11<20:13:47, 3.36it/s] 34%|███▍ | 126422/371472 [10:03:12<19:24:14, 3.51it/s] 34%|███▍ | 126423/371472 [10:03:12<20:05:51, 3.39it/s] 34%|███▍ | 126424/371472 [10:03:12<19:06:49, 3.56it/s] 34%|███▍ | 126425/371472 [10:03:12<18:46:20, 3.63it/s] 34%|███▍ | 126426/371472 [10:03:13<18:42:29, 3.64it/s] 34%|███▍ | 126427/371472 [10:03:13<18:26:14, 3.69it/s] 34%|███▍ | 126428/371472 [10:03:13<19:16:00, 3.53it/s] 34%|███▍ | 126429/371472 [10:03:14<19:04:50, 3.57it/s] 34%|███▍ | 126430/371472 [10:03:14<19:39:56, 3.46it/s] 34%|███▍ | 126431/371472 [10:03:14<19:06:15, 3.56it/s] 34%|███▍ | 126432/371472 [10:03:14<19:41:37, 3.46it/s] 34%|███▍ | 126433/371472 [10:03:15<19:47:32, 3.44it/s] 34%|███▍ | 126434/371472 [10:03:15<19:10:58, 3.55it/s] 34%|███▍ | 126435/371472 [10:03:15<18:51:00, 3.61it/s] 34%|███▍ | 126436/371472 [10:03:15<18:15:36, 3.73it/s] 34%|███▍ | 126437/371472 [10:03:16<20:25:44, 3.33it/s] 34%|███▍ | 126438/371472 [10:03:16<19:47:36, 3.44it/s] 34%|███▍ | 126439/371472 [10:03:16<19:26:52, 3.50it/s] 34%|███▍ | 126440/371472 [10:03:17<19:30:07, 3.49it/s] {'loss': 3.1928, 'learning_rate': 6.9398177077722e-07, 'epoch': 5.45} + 34%|███▍ | 126440/371472 [10:03:17<19:30:07, 3.49it/s] 34%|███▍ | 126441/371472 [10:03:17<19:09:22, 3.55it/s] 34%|███▍ | 126442/371472 [10:03:17<18:50:01, 3.61it/s] 34%|███▍ | 126443/371472 [10:03:18<19:09:51, 3.55it/s] 34%|███▍ | 126444/371472 [10:03:18<19:27:30, 3.50it/s] 34%|███▍ | 126445/371472 [10:03:18<19:09:24, 3.55it/s] 34%|███▍ | 126446/371472 [10:03:18<20:00:21, 3.40it/s] 34%|███▍ | 126447/371472 [10:03:19<19:39:49, 3.46it/s] 34%|███▍ | 126448/371472 [10:03:19<19:02:35, 3.57it/s] 34%|███▍ | 126449/371472 [10:03:19<19:50:38, 3.43it/s] 34%|███▍ | 126450/371472 [10:03:20<19:38:16, 3.47it/s] 34%|███▍ | 126451/371472 [10:03:20<20:37:32, 3.30it/s] 34%|███▍ | 126452/371472 [10:03:20<20:18:45, 3.35it/s] 34%|███▍ | 126453/371472 [10:03:21<21:32:54, 3.16it/s] 34%|███▍ | 126454/371472 [10:03:21<20:32:59, 3.31it/s] 34%|███▍ | 126455/371472 [10:03:21<22:02:00, 3.09it/s] 34%|███▍ | 126456/371472 [10:03:21<21:40:41, 3.14it/s] 34%|███▍ | 126457/371472 [10:03:22<20:45:10, 3.28it/s] 34%|███▍ | 126458/371472 [10:03:22<19:53:10, 3.42it/s] 34%|███▍ | 126459/371472 [10:03:22<20:25:56, 3.33it/s] 34%|███▍ | 126460/371472 [10:03:23<20:31:37, 3.32it/s] {'loss': 3.2087, 'learning_rate': 6.93933288801741e-07, 'epoch': 5.45} + 34%|███▍ | 126460/371472 [10:03:23<20:31:37, 3.32it/s] 34%|███▍ | 126461/371472 [10:03:23<19:43:02, 3.45it/s] 34%|███▍ | 126462/371472 [10:03:23<19:04:48, 3.57it/s] 34%|███▍ | 126463/371472 [10:03:23<19:20:41, 3.52it/s] 34%|███▍ | 126464/371472 [10:03:24<18:34:21, 3.66it/s] 34%|███▍ | 126465/371472 [10:03:24<18:05:34, 3.76it/s] 34%|███▍ | 126466/371472 [10:03:24<19:37:56, 3.47it/s] 34%|███▍ | 126467/371472 [10:03:25<19:14:47, 3.54it/s] 34%|███▍ | 126468/371472 [10:03:25<19:20:34, 3.52it/s] 34%|███▍ | 126469/371472 [10:03:25<19:13:14, 3.54it/s] 34%|███▍ | 126470/371472 [10:03:25<18:33:03, 3.67it/s] 34%|███▍ | 126471/371472 [10:03:26<19:11:04, 3.55it/s] 34%|███▍ | 126472/371472 [10:03:26<18:45:44, 3.63it/s] 34%|███▍ | 126473/371472 [10:03:26<18:18:09, 3.72it/s] 34%|███▍ | 126474/371472 [10:03:26<17:57:48, 3.79it/s] 34%|███▍ | 126475/371472 [10:03:27<18:58:46, 3.59it/s] 34%|███▍ | 126476/371472 [10:03:27<18:53:43, 3.60it/s] 34%|███▍ | 126477/371472 [10:03:27<19:28:44, 3.49it/s] 34%|███▍ | 126478/371472 [10:03:28<19:42:53, 3.45it/s] 34%|███▍ | 126479/371472 [10:03:28<20:06:22, 3.38it/s] 34%|███▍ | 126480/371472 [10:03:28<21:21:16, 3.19it/s] {'loss': 3.2636, 'learning_rate': 6.938848068262621e-07, 'epoch': 5.45} + 34%|███▍ | 126480/371472 [10:03:28<21:21:16, 3.19it/s] 34%|███▍ | 126481/371472 [10:03:29<19:51:33, 3.43it/s] 34%|███▍ | 126482/371472 [10:03:29<19:44:35, 3.45it/s] 34%|███▍ | 126483/371472 [10:03:29<19:19:31, 3.52it/s] 34%|███▍ | 126484/371472 [10:03:29<18:46:18, 3.63it/s] 34%|███▍ | 126485/371472 [10:03:30<18:28:01, 3.69it/s] 34%|███▍ | 126486/371472 [10:03:30<18:27:48, 3.69it/s] 34%|███▍ | 126487/371472 [10:03:30<18:15:14, 3.73it/s] 34%|███▍ | 126488/371472 [10:03:30<17:48:28, 3.82it/s] 34%|███▍ | 126489/371472 [10:03:31<17:38:23, 3.86it/s] 34%|███▍ | 126490/371472 [10:03:31<18:59:38, 3.58it/s] 34%|███▍ | 126491/371472 [10:03:31<18:40:15, 3.64it/s] 34%|███▍ | 126492/371472 [10:03:32<19:17:01, 3.53it/s] 34%|███▍ | 126493/371472 [10:03:32<19:16:21, 3.53it/s] 34%|███▍ | 126494/371472 [10:03:32<18:48:55, 3.62it/s] 34%|███▍ | 126495/371472 [10:03:32<18:24:01, 3.70it/s] 34%|███▍ | 126496/371472 [10:03:33<18:38:54, 3.65it/s] 34%|███▍ | 126497/371472 [10:03:33<19:01:23, 3.58it/s] 34%|███▍ | 126498/371472 [10:03:33<18:58:10, 3.59it/s] 34%|███▍ | 126499/371472 [10:03:33<19:15:08, 3.53it/s] 34%|███▍ | 126500/371472 [10:03:34<19:44:58, 3.45it/s] {'loss': 3.1491, 'learning_rate': 6.938363248507833e-07, 'epoch': 5.45} + 34%|███▍ | 126500/371472 [10:03:34<19:44:58, 3.45it/s] 34%|███▍ | 126501/371472 [10:03:34<19:06:24, 3.56it/s] 34%|███▍ | 126502/371472 [10:03:34<19:19:05, 3.52it/s] 34%|███▍ | 126503/371472 [10:03:35<18:46:52, 3.62it/s] 34%|███▍ | 126504/371472 [10:03:35<18:06:36, 3.76it/s] 34%|███▍ | 126505/371472 [10:03:35<18:31:11, 3.67it/s] 34%|███▍ | 126506/371472 [10:03:35<17:47:15, 3.83it/s] 34%|███▍ | 126507/371472 [10:03:36<18:28:26, 3.68it/s] 34%|███▍ | 126508/371472 [10:03:36<18:26:28, 3.69it/s] 34%|███▍ | 126509/371472 [10:03:36<19:35:54, 3.47it/s] 34%|███▍ | 126510/371472 [10:03:37<18:58:31, 3.59it/s] 34%|███▍ | 126511/371472 [10:03:37<19:11:43, 3.54it/s] 34%|███▍ | 126512/371472 [10:03:37<19:16:02, 3.53it/s] 34%|███▍ | 126513/371472 [10:03:37<19:54:56, 3.42it/s] 34%|███▍ | 126514/371472 [10:03:38<19:48:33, 3.43it/s] 34%|███▍ | 126515/371472 [10:03:38<20:32:40, 3.31it/s] 34%|███▍ | 126516/371472 [10:03:38<21:07:01, 3.22it/s] 34%|███▍ | 126517/371472 [10:03:39<20:52:47, 3.26it/s] 34%|███▍ | 126518/371472 [10:03:39<21:07:27, 3.22it/s] 34%|███▍ | 126519/371472 [10:03:39<20:05:02, 3.39it/s] 34%|███▍ | 126520/371472 [10:03:40<21:18:51, 3.19it/s] {'loss': 3.0597, 'learning_rate': 6.937878428753043e-07, 'epoch': 5.45} + 34%|███▍ | 126520/371472 [10:03:40<21:18:51, 3.19it/s] 34%|███▍ | 126521/371472 [10:03:40<21:57:31, 3.10it/s] 34%|███▍ | 126522/371472 [10:03:40<21:20:58, 3.19it/s] 34%|███▍ | 126523/371472 [10:03:41<21:04:43, 3.23it/s] 34%|███▍ | 126524/371472 [10:03:41<22:40:07, 3.00it/s] 34%|███▍ | 126525/371472 [10:03:41<20:49:37, 3.27it/s] 34%|███▍ | 126526/371472 [10:03:41<20:49:31, 3.27it/s] 34%|███▍ | 126527/371472 [10:03:42<19:53:07, 3.42it/s] 34%|███▍ | 126528/371472 [10:03:42<19:01:00, 3.58it/s] 34%|███▍ | 126529/371472 [10:03:42<21:15:31, 3.20it/s] 34%|███▍ | 126530/371472 [10:03:43<20:58:19, 3.24it/s] 34%|███▍ | 126531/371472 [10:03:43<19:51:06, 3.43it/s] 34%|███▍ | 126532/371472 [10:03:43<19:39:49, 3.46it/s] 34%|███▍ | 126533/371472 [10:03:43<19:19:08, 3.52it/s] 34%|███▍ | 126534/371472 [10:03:44<19:25:49, 3.50it/s] 34%|███▍ | 126535/371472 [10:03:44<19:55:40, 3.41it/s] 34%|███▍ | 126536/371472 [10:03:44<19:50:44, 3.43it/s] 34%|███▍ | 126537/371472 [10:03:45<18:49:00, 3.62it/s] 34%|███▍ | 126538/371472 [10:03:45<18:45:09, 3.63it/s] 34%|███▍ | 126539/371472 [10:03:45<18:58:47, 3.58it/s] 34%|███▍ | 126540/371472 [10:03:45<19:37:50, 3.47it/s] {'loss': 3.3135, 'learning_rate': 6.937393608998254e-07, 'epoch': 5.45} + 34%|███▍ | 126540/371472 [10:03:45<19:37:50, 3.47it/s] 34%|███▍ | 126541/371472 [10:03:46<20:21:35, 3.34it/s] 34%|███▍ | 126542/371472 [10:03:46<19:53:57, 3.42it/s] 34%|███▍ | 126543/371472 [10:03:46<19:04:46, 3.57it/s] 34%|███▍ | 126544/371472 [10:03:47<18:28:23, 3.68it/s] 34%|███▍ | 126545/371472 [10:03:47<18:39:30, 3.65it/s] 34%|███▍ | 126546/371472 [10:03:47<18:30:57, 3.67it/s] 34%|███▍ | 126547/371472 [10:03:47<19:18:16, 3.52it/s] 34%|███▍ | 126548/371472 [10:03:48<21:10:09, 3.21it/s] 34%|███▍ | 126549/371472 [10:03:48<20:45:34, 3.28it/s] 34%|███▍ | 126550/371472 [10:03:48<20:50:49, 3.26it/s] 34%|███▍ | 126551/371472 [10:03:49<20:25:48, 3.33it/s] 34%|███▍ | 126552/371472 [10:03:49<19:56:20, 3.41it/s] 34%|███▍ | 126553/371472 [10:03:49<19:38:19, 3.46it/s] 34%|███▍ | 126554/371472 [10:03:49<18:41:18, 3.64it/s] 34%|███▍ | 126555/371472 [10:03:50<19:13:15, 3.54it/s] 34%|███▍ | 126556/371472 [10:03:50<21:46:54, 3.12it/s] 34%|███▍ | 126557/371472 [10:03:51<21:15:59, 3.20it/s] 34%|███▍ | 126558/371472 [10:03:51<20:16:09, 3.36it/s] 34%|███▍ | 126559/371472 [10:03:51<20:35:09, 3.30it/s] 34%|███▍ | 126560/371472 [10:03:51<19:45:17, 3.44it/s] {'loss': 3.2245, 'learning_rate': 6.936908789243466e-07, 'epoch': 5.45} + 34%|███▍ | 126560/371472 [10:03:51<19:45:17, 3.44it/s] 34%|███▍ | 126561/371472 [10:03:52<19:59:01, 3.40it/s] 34%|███▍ | 126562/371472 [10:03:52<20:18:34, 3.35it/s] 34%|███▍ | 126563/371472 [10:03:52<19:43:16, 3.45it/s] 34%|███▍ | 126564/371472 [10:03:53<19:48:31, 3.43it/s] 34%|███▍ | 126565/371472 [10:03:53<19:00:01, 3.58it/s] 34%|███▍ | 126566/371472 [10:03:53<18:38:33, 3.65it/s] 34%|███▍ | 126567/371472 [10:03:53<18:28:45, 3.68it/s] 34%|███▍ | 126568/371472 [10:03:54<18:02:19, 3.77it/s] 34%|███▍ | 126569/371472 [10:03:54<17:33:49, 3.87it/s] 34%|███▍ | 126570/371472 [10:03:54<20:45:45, 3.28it/s] 34%|███▍ | 126571/371472 [10:03:54<19:27:38, 3.50it/s] 34%|███▍ | 126572/371472 [10:03:55<18:56:56, 3.59it/s] 34%|███▍ | 126573/371472 [10:03:55<18:16:08, 3.72it/s] 34%|███▍ | 126574/371472 [10:03:55<18:58:20, 3.59it/s] 34%|███▍ | 126575/371472 [10:03:56<18:36:37, 3.66it/s] 34%|███▍ | 126576/371472 [10:03:56<18:19:43, 3.71it/s] 34%|███▍ | 126577/371472 [10:03:56<18:05:09, 3.76it/s] 34%|███▍ | 126578/371472 [10:03:56<18:34:17, 3.66it/s] 34%|███▍ | 126579/371472 [10:03:57<19:24:53, 3.50it/s] 34%|███▍ | 126580/371472 [10:03:57<18:52:19, 3.60it/s] {'loss': 3.3457, 'learning_rate': 6.936423969488677e-07, 'epoch': 5.45} + 34%|███▍ | 126580/371472 [10:03:57<18:52:19, 3.60it/s] 34%|███▍ | 126581/371472 [10:03:57<19:10:47, 3.55it/s] 34%|███▍ | 126582/371472 [10:03:57<18:54:33, 3.60it/s] 34%|███▍ | 126583/371472 [10:03:58<18:42:55, 3.63it/s] 34%|███▍ | 126584/371472 [10:03:58<18:26:53, 3.69it/s] 34%|███▍ | 126585/371472 [10:03:58<19:02:25, 3.57it/s] 34%|███▍ | 126586/371472 [10:03:59<19:21:44, 3.51it/s] 34%|███▍ | 126587/371472 [10:03:59<20:40:48, 3.29it/s] 34%|███▍ | 126588/371472 [10:03:59<20:11:41, 3.37it/s] 34%|███▍ | 126589/371472 [10:03:59<19:23:17, 3.51it/s] 34%|███▍ | 126590/371472 [10:04:00<19:09:29, 3.55it/s] 34%|███▍ | 126591/371472 [10:04:00<19:25:00, 3.50it/s] 34%|███▍ | 126592/371472 [10:04:00<19:11:20, 3.54it/s] 34%|███▍ | 126593/371472 [10:04:01<19:59:02, 3.40it/s] 34%|███▍ | 126594/371472 [10:04:01<19:24:49, 3.50it/s] 34%|███▍ | 126595/371472 [10:04:01<19:05:13, 3.56it/s] 34%|███▍ | 126596/371472 [10:04:01<18:44:55, 3.63it/s] 34%|███▍ | 126597/371472 [10:04:02<19:09:49, 3.55it/s] 34%|███▍ | 126598/371472 [10:04:02<20:21:15, 3.34it/s] 34%|███▍ | 126599/371472 [10:04:02<20:09:48, 3.37it/s] 34%|███▍ | 126600/371472 [10:04:03<20:05:51, 3.38it/s] {'loss': 3.3086, 'learning_rate': 6.935939149733887e-07, 'epoch': 5.45} + 34%|███▍ | 126600/371472 [10:04:03<20:05:51, 3.38it/s] 34%|███▍ | 126601/371472 [10:04:03<19:32:32, 3.48it/s] 34%|███▍ | 126602/371472 [10:04:03<18:52:11, 3.60it/s] 34%|███▍ | 126603/371472 [10:04:03<18:36:09, 3.66it/s] 34%|███▍ | 126604/371472 [10:04:04<18:54:22, 3.60it/s] 34%|███▍ | 126605/371472 [10:04:04<18:31:10, 3.67it/s] 34%|██���▍ | 126606/371472 [10:04:04<18:36:46, 3.65it/s] 34%|███▍ | 126607/371472 [10:04:05<20:39:56, 3.29it/s] 34%|███▍ | 126608/371472 [10:04:05<19:37:17, 3.47it/s] 34%|███▍ | 126609/371472 [10:04:05<19:10:02, 3.55it/s] 34%|███▍ | 126610/371472 [10:04:05<19:04:43, 3.57it/s] 34%|███▍ | 126611/371472 [10:04:06<18:43:20, 3.63it/s] 34%|███▍ | 126612/371472 [10:04:06<22:06:47, 3.08it/s] 34%|███▍ | 126613/371472 [10:04:06<21:42:39, 3.13it/s] 34%|███▍ | 126614/371472 [10:04:07<22:01:36, 3.09it/s] 34%|███▍ | 126615/371472 [10:04:07<21:08:12, 3.22it/s] 34%|███▍ | 126616/371472 [10:04:07<20:30:17, 3.32it/s] 34%|███▍ | 126617/371472 [10:04:08<19:24:25, 3.50it/s] 34%|███▍ | 126618/371472 [10:04:08<19:11:29, 3.54it/s] 34%|███▍ | 126619/371472 [10:04:08<19:54:37, 3.42it/s] 34%|███▍ | 126620/371472 [10:04:09<21:33:57, 3.15it/s] {'loss': 3.1835, 'learning_rate': 6.935454329979098e-07, 'epoch': 5.45} + 34%|███▍ | 126620/371472 [10:04:09<21:33:57, 3.15it/s] 34%|███▍ | 126621/371472 [10:04:09<22:03:49, 3.08it/s] 34%|███▍ | 126622/371472 [10:04:09<20:47:00, 3.27it/s] 34%|███▍ | 126623/371472 [10:04:09<19:59:07, 3.40it/s] 34%|███▍ | 126624/371472 [10:04:10<19:27:52, 3.49it/s] 34%|███▍ | 126625/371472 [10:04:10<18:48:35, 3.62it/s] 34%|███▍ | 126626/371472 [10:04:10<19:07:32, 3.56it/s] 34%|███▍ | 126627/371472 [10:04:11<20:05:20, 3.39it/s] 34%|███▍ | 126628/371472 [10:04:11<19:35:30, 3.47it/s] 34%|███▍ | 126629/371472 [10:04:11<18:49:06, 3.61it/s] 34%|███▍ | 126630/371472 [10:04:11<18:41:14, 3.64it/s] 34%|███▍ | 126631/371472 [10:04:12<19:22:14, 3.51it/s] 34%|███▍ | 126632/371472 [10:04:12<18:52:05, 3.60it/s] 34%|███▍ | 126633/371472 [10:04:12<18:00:47, 3.78it/s] 34%|███▍ | 126634/371472 [10:04:12<19:07:49, 3.56it/s] 34%|███▍ | 126635/371472 [10:04:13<18:28:31, 3.68it/s] 34%|███▍ | 126636/371472 [10:04:13<20:10:32, 3.37it/s] 34%|███▍ | 126637/371472 [10:04:13<20:44:32, 3.28it/s] 34%|███▍ | 126638/371472 [10:04:14<20:12:31, 3.37it/s] 34%|███▍ | 126639/371472 [10:04:14<21:06:50, 3.22it/s] 34%|███▍ | 126640/371472 [10:04:14<21:27:43, 3.17it/s] {'loss': 3.3209, 'learning_rate': 6.93496951022431e-07, 'epoch': 5.45} + 34%|███▍ | 126640/371472 [10:04:14<21:27:43, 3.17it/s] 34%|███▍ | 126641/371472 [10:04:15<20:35:14, 3.30it/s] 34%|███▍ | 126642/371472 [10:04:15<19:41:16, 3.45it/s] 34%|███▍ | 126643/371472 [10:04:15<20:53:50, 3.25it/s] 34%|███▍ | 126644/371472 [10:04:16<20:16:04, 3.36it/s] 34%|███▍ | 126645/371472 [10:04:16<19:16:54, 3.53it/s] 34%|███▍ | 126646/371472 [10:04:16<19:17:28, 3.53it/s] 34%|███▍ | 126647/371472 [10:04:16<18:53:42, 3.60it/s] 34%|███▍ | 126648/371472 [10:04:17<18:09:43, 3.74it/s] 34%|███▍ | 126649/371472 [10:04:17<18:15:59, 3.72it/s] 34%|███▍ | 126650/371472 [10:04:17<18:16:01, 3.72it/s] 34%|███▍ | 126651/371472 [10:04:17<18:07:40, 3.75it/s] 34%|███▍ | 126652/371472 [10:04:18<19:11:21, 3.54it/s] 34%|███▍ | 126653/371472 [10:04:18<18:49:58, 3.61it/s] 34%|███▍ | 126654/371472 [10:04:18<18:21:39, 3.70it/s] 34%|███▍ | 126655/371472 [10:04:18<18:05:26, 3.76it/s] 34%|███▍ | 126656/371472 [10:04:19<17:57:06, 3.79it/s] 34%|███▍ | 126657/371472 [10:04:19<18:27:22, 3.68it/s] 34%|███▍ | 126658/371472 [10:04:19<18:57:49, 3.59it/s] 34%|███▍ | 126659/371472 [10:04:20<19:02:01, 3.57it/s] 34%|███▍ | 126660/371472 [10:04:20<19:04:49, 3.56it/s] {'loss': 3.1743, 'learning_rate': 6.93448469046952e-07, 'epoch': 5.46} + 34%|███▍ | 126660/371472 [10:04:20<19:04:49, 3.56it/s] 34%|███▍ | 126661/371472 [10:04:20<18:53:56, 3.60it/s] 34%|███▍ | 126662/371472 [10:04:20<18:32:24, 3.67it/s] 34%|███▍ | 126663/371472 [10:04:21<18:43:58, 3.63it/s] 34%|███▍ | 126664/371472 [10:04:21<18:43:59, 3.63it/s] 34%|███▍ | 126665/371472 [10:04:21<18:54:28, 3.60it/s] 34%|███▍ | 126666/371472 [10:04:22<18:43:49, 3.63it/s] 34%|███▍ | 126667/371472 [10:04:22<19:24:00, 3.51it/s] 34%|███▍ | 126668/371472 [10:04:22<18:41:31, 3.64it/s] 34%|███▍ | 126669/371472 [10:04:22<19:28:54, 3.49it/s] 34%|███▍ | 126670/371472 [10:04:23<19:10:42, 3.55it/s] 34%|███▍ | 126671/371472 [10:04:23<18:33:18, 3.66it/s] 34%|███▍ | 126672/371472 [10:04:23<18:41:46, 3.64it/s] 34%|███▍ | 126673/371472 [10:04:23<19:04:44, 3.56it/s] 34%|███▍ | 126674/371472 [10:04:24<19:18:55, 3.52it/s] 34%|███▍ | 126675/371472 [10:04:24<18:29:26, 3.68it/s] 34%|███▍ | 126676/371472 [10:04:24<18:32:58, 3.67it/s] 34%|███▍ | 126677/371472 [10:04:25<18:30:46, 3.67it/s] 34%|███▍ | 126678/371472 [10:04:25<17:52:41, 3.80it/s] 34%|███▍ | 126679/371472 [10:04:25<18:21:29, 3.70it/s] 34%|███▍ | 126680/371472 [10:04:25<19:50:43, 3.43it/s] {'loss': 3.4087, 'learning_rate': 6.93399987071473e-07, 'epoch': 5.46} + 34%|███▍ | 126680/371472 [10:04:25<19:50:43, 3.43it/s] 34%|███▍ | 126681/371472 [10:04:26<19:22:05, 3.51it/s] 34%|███▍ | 126682/371472 [10:04:26<19:37:24, 3.47it/s] 34%|███▍ | 126683/371472 [10:04:26<19:51:03, 3.43it/s] 34%|███▍ | 126684/371472 [10:04:27<19:19:43, 3.52it/s] 34%|███▍ | 126685/371472 [10:04:27<20:02:29, 3.39it/s] 34%|███▍ | 126686/371472 [10:04:27<19:36:12, 3.47it/s] 34%|███▍ | 126687/371472 [10:04:27<20:00:03, 3.40it/s] 34%|███▍ | 126688/371472 [10:04:28<19:31:30, 3.48it/s] 34%|███▍ | 126689/371472 [10:04:28<20:07:54, 3.38it/s] 34%|███▍ | 126690/371472 [10:04:28<19:33:01, 3.48it/s] 34%|███▍ | 126691/371472 [10:04:29<18:45:42, 3.62it/s] 34%|███▍ | 126692/371472 [10:04:29<19:53:30, 3.42it/s] 34%|███▍ | 126693/371472 [10:04:29<19:00:14, 3.58it/s] 34%|███▍ | 126694/371472 [10:04:29<20:05:08, 3.39it/s] 34%|███▍ | 126695/371472 [10:04:30<20:28:33, 3.32it/s] 34%|███▍ | 126696/371472 [10:04:30<21:51:36, 3.11it/s] 34%|███▍ | 126697/371472 [10:04:30<20:42:33, 3.28it/s] 34%|███▍ | 126698/371472 [10:04:31<20:08:15, 3.38it/s] 34%|███▍ | 126699/371472 [10:04:31<20:17:32, 3.35it/s] 34%|███▍ | 126700/371472 [10:04:31<19:45:12, 3.44it/s] {'loss': 3.1188, 'learning_rate': 6.933515050959943e-07, 'epoch': 5.46} + 34%|███▍ | 126700/371472 [10:04:31<19:45:12, 3.44it/s] 34%|███▍ | 126701/371472 [10:04:32<19:09:11, 3.55it/s] 34%|███▍ | 126702/371472 [10:04:32<18:36:15, 3.65it/s] 34%|███▍ | 126703/371472 [10:04:32<18:15:12, 3.72it/s] 34%|███▍ | 126704/371472 [10:04:32<19:36:24, 3.47it/s] 34%|███▍ | 126705/371472 [10:04:33<18:57:48, 3.59it/s] 34%|███▍ | 126706/371472 [10:04:33<19:42:10, 3.45it/s] 34%|███▍ | 126707/371472 [10:04:33<19:09:06, 3.55it/s] 34%|███▍ | 126708/371472 [10:04:34<19:40:31, 3.46it/s] 34%|███▍ | 126709/371472 [10:04:34<20:37:29, 3.30it/s] 34%|███▍ | 126710/371472 [10:04:34<21:03:17, 3.23it/s] 34%|███▍ | 126711/371472 [10:04:34<20:14:33, 3.36it/s] 34%|███▍ | 126712/371472 [10:04:35<20:16:07, 3.35it/s] 34%|███▍ | 126713/371472 [10:04:35<20:30:00, 3.32it/s] 34%|███▍ | 126714/371472 [10:04:35<19:24:47, 3.50it/s] 34%|███▍ | 126715/371472 [10:04:36<18:48:05, 3.62it/s] 34%|███▍ | 126716/371472 [10:04:36<18:09:48, 3.74it/s] 34%|███▍ | 126717/371472 [10:04:36<18:56:54, 3.59it/s] 34%|███▍ | 126718/371472 [10:04:36<18:39:31, 3.64it/s] 34%|███▍ | 126719/371472 [10:04:37<19:10:52, 3.54it/s] 34%|███▍ | 126720/371472 [10:04:37<19:26:21, 3.50it/s] {'loss': 3.2372, 'learning_rate': 6.933030231205155e-07, 'epoch': 5.46} + 34%|███▍ | 126720/371472 [10:04:37<19:26:21, 3.50it/s] 34%|███▍ | 126721/371472 [10:04:37<19:07:02, 3.56it/s] 34%|███▍ | 126722/371472 [10:04:37<18:15:02, 3.73it/s] 34%|███▍ | 126723/371472 [10:04:38<18:50:58, 3.61it/s] 34%|███▍ | 126724/371472 [10:04:38<18:42:35, 3.63it/s] 34%|███▍ | 126725/371472 [10:04:38<19:57:42, 3.41it/s] 34%|███▍ | 126726/371472 [10:04:39<20:15:01, 3.36it/s] 34%|███▍ | 126727/371472 [10:04:39<20:06:28, 3.38it/s] 34%|███▍ | 126728/371472 [10:04:39<20:55:27, 3.25it/s] 34%|███▍ | 126729/371472 [10:04:40<21:18:09, 3.19it/s] 34%|███▍ | 126730/371472 [10:04:40<21:30:25, 3.16it/s] 34%|███▍ | 126731/371472 [10:04:40<20:28:24, 3.32it/s] 34%|███▍ | 126732/371472 [10:04:40<19:17:50, 3.52it/s] 34%|███▍ | 126733/371472 [10:04:41<18:13:51, 3.73it/s] 34%|███▍ | 126734/371472 [10:04:41<18:09:03, 3.75it/s] 34%|███▍ | 126735/371472 [10:04:41<18:09:24, 3.74it/s] 34%|███▍ | 126736/371472 [10:04:41<17:50:06, 3.81it/s] 34%|███▍ | 126737/371472 [10:04:42<19:32:15, 3.48it/s] 34%|███▍ | 126738/371472 [10:04:42<19:34:31, 3.47it/s] 34%|███▍ | 126739/371472 [10:04:42<20:31:06, 3.31it/s] 34%|███▍ | 126740/371472 [10:04:43<21:27:01, 3.17it/s] {'loss': 3.2105, 'learning_rate': 6.932545411450364e-07, 'epoch': 5.46} + 34%|███▍ | 126740/371472 [10:04:43<21:27:01, 3.17it/s] 34%|███▍ | 126741/371472 [10:04:43<21:53:27, 3.11it/s] 34%|███▍ | 126742/371472 [10:04:43<21:55:04, 3.10it/s] 34%|███▍ | 126743/371472 [10:04:44<20:58:32, 3.24it/s] 34%|███▍ | 126744/371472 [10:04:44<19:48:31, 3.43it/s] 34%|███▍ | 126745/371472 [10:04:44<19:06:40, 3.56it/s] 34%|███▍ | 126746/371472 [10:04:45<19:06:59, 3.56it/s] 34%|███▍ | 126747/371472 [10:04:45<19:43:21, 3.45it/s] 34%|███▍ | 126748/371472 [10:04:45<20:52:47, 3.26it/s] 34%|███▍ | 126749/371472 [10:04:45<20:14:18, 3.36it/s] 34%|███▍ | 126750/371472 [10:04:46<19:35:05, 3.47it/s] 34%|███▍ | 126751/371472 [10:04:46<20:19:13, 3.35it/s] 34%|███▍ | 126752/371472 [10:04:46<19:47:16, 3.44it/s] 34%|███▍ | 126753/371472 [10:04:47<18:54:01, 3.60it/s] 34%|███▍ | 126754/371472 [10:04:47<18:38:35, 3.65it/s] 34%|███▍ | 126755/371472 [10:04:47<17:56:19, 3.79it/s] 34%|███▍ | 126756/371472 [10:04:47<18:37:27, 3.65it/s] 34%|███▍ | 126757/371472 [10:04:48<18:02:28, 3.77it/s] 34%|███▍ | 126758/371472 [10:04:48<17:56:56, 3.79it/s] 34%|███▍ | 126759/371472 [10:04:48<17:53:17, 3.80it/s] 34%|███▍ | 126760/371472 [10:04:48<18:13:39, 3.73it/s] {'loss': 3.1491, 'learning_rate': 6.932060591695575e-07, 'epoch': 5.46} + 34%|███▍ | 126760/371472 [10:04:48<18:13:39, 3.73it/s] 34%|███▍ | 126761/371472 [10:04:49<19:07:56, 3.55it/s] 34%|███▍ | 126762/371472 [10:04:49<19:44:17, 3.44it/s] 34%|███▍ | 126763/371472 [10:04:49<20:16:38, 3.35it/s] 34%|███▍ | 126764/371472 [10:04:50<19:57:02, 3.41it/s] 34%|███▍ | 126765/371472 [10:04:50<20:07:43, 3.38it/s] 34%|███▍ | 126766/371472 [10:04:50<21:11:52, 3.21it/s] 34%|███▍ | 126767/371472 [10:04:51<20:54:26, 3.25it/s] 34%|███▍ | 126768/371472 [10:04:51<20:42:31, 3.28it/s] 34%|███▍ | 126769/371472 [10:04:51<21:01:24, 3.23it/s] 34%|███▍ | 126770/371472 [10:04:52<20:40:56, 3.29it/s] 34%|███▍ | 126771/371472 [10:04:52<21:00:23, 3.24it/s] 34%|███▍ | 126772/371472 [10:04:52<20:19:01, 3.35it/s] 34%|███▍ | 126773/371472 [10:04:52<21:49:51, 3.11it/s] 34%|███▍ | 126774/371472 [10:04:53<21:18:40, 3.19it/s] 34%|███▍ | 126775/371472 [10:04:53<21:47:38, 3.12it/s] 34%|███▍ | 126776/371472 [10:04:53<20:40:42, 3.29it/s] 34%|███▍ | 126777/371472 [10:04:54<19:10:46, 3.54it/s] 34%|███▍ | 126778/371472 [10:04:54<18:22:33, 3.70it/s] 34%|███▍ | 126779/371472 [10:04:54<18:27:40, 3.68it/s] 34%|███▍ | 126780/371472 [10:04:54<18:04:30, 3.76it/s] {'loss': 3.1326, 'learning_rate': 6.931575771940788e-07, 'epoch': 5.46} + 34%|███▍ | 126780/371472 [10:04:54<18:04:30, 3.76it/s] 34%|███▍ | 126781/371472 [10:04:55<19:30:28, 3.48it/s] 34%|███▍ | 126782/371472 [10:04:55<19:01:00, 3.57it/s] 34%|███▍ | 126783/371472 [10:04:55<18:24:56, 3.69it/s] 34%|███▍ | 126784/371472 [10:04:56<19:14:13, 3.53it/s] 34%|███▍ | 126785/371472 [10:04:56<20:25:59, 3.33it/s] 34%|███▍ | 126786/371472 [10:04:56<19:16:28, 3.53it/s] 34%|███▍ | 126787/371472 [10:04:56<18:47:04, 3.62it/s] 34%|███▍ | 126788/371472 [10:04:57<19:34:00, 3.47it/s] 34%|███▍ | 126789/371472 [10:04:57<20:56:21, 3.25it/s] 34%|███▍ | 126790/371472 [10:04:57<21:07:59, 3.22it/s] 34%|███▍ | 126791/371472 [10:04:58<20:13:18, 3.36it/s] 34%|███▍ | 126792/371472 [10:04:58<19:51:06, 3.42it/s] 34%|███▍ | 126793/371472 [10:04:58<20:18:38, 3.35it/s] 34%|███▍ | 126794/371472 [10:04:59<20:42:27, 3.28it/s] 34%|███▍ | 126795/371472 [10:04:59<19:46:03, 3.44it/s] 34%|███▍ | 126796/371472 [10:04:59<19:39:40, 3.46it/s] 34%|███▍ | 126797/371472 [10:04:59<19:30:57, 3.48it/s] 34%|███▍ | 126798/371472 [10:05:00<19:26:45, 3.50it/s] 34%|███▍ | 126799/371472 [10:05:00<19:22:09, 3.51it/s] 34%|███▍ | 126800/371472 [10:05:00<20:50:29, 3.26it/s] {'loss': 3.0921, 'learning_rate': 6.931090952185999e-07, 'epoch': 5.46} + 34%|███▍ | 126800/371472 [10:05:00<20:50:29, 3.26it/s] 34%|███▍ | 126801/371472 [10:05:01<23:29:36, 2.89it/s] 34%|███▍ | 126802/371472 [10:05:01<23:41:40, 2.87it/s] 34%|███▍ | 126803/371472 [10:05:01<21:48:36, 3.12it/s] 34%|███▍ | 126804/371472 [10:05:02<21:16:10, 3.20it/s] 34%|███▍ | 126805/371472 [10:05:02<19:49:45, 3.43it/s] 34%|███▍ | 126806/371472 [10:05:02<18:35:53, 3.65it/s] 34%|███▍ | 126807/371472 [10:05:02<19:54:42, 3.41it/s] 34%|███▍ | 126808/371472 [10:05:03<19:21:14, 3.51it/s] 34%|███▍ | 126809/371472 [10:05:03<19:51:47, 3.42it/s] 34%|███▍ | 126810/371472 [10:05:03<19:38:46, 3.46it/s] 34%|███▍ | 126811/371472 [10:05:04<19:56:17, 3.41it/s] 34%|███▍ | 126812/371472 [10:05:04<20:37:53, 3.29it/s] 34%|███▍ | 126813/371472 [10:05:04<19:46:20, 3.44it/s] 34%|███▍ | 126814/371472 [10:05:05<20:35:36, 3.30it/s] 34%|███▍ | 126815/371472 [10:05:05<19:26:24, 3.50it/s] 34%|███▍ | 126816/371472 [10:05:05<27:15:42, 2.49it/s] 34%|███▍ | 126817/371472 [10:05:06<24:40:36, 2.75it/s] 34%|███▍ | 126818/371472 [10:05:06<23:06:02, 2.94it/s] 34%|███▍ | 126819/371472 [10:05:06<22:00:02, 3.09it/s] 34%|███▍ | 126820/371472 [10:05:07<22:09:22, 3.07it/s] {'loss': 2.9035, 'learning_rate': 6.930606132431209e-07, 'epoch': 5.46} + 34%|███▍ | 126820/371472 [10:05:07<22:09:22, 3.07it/s] 34%|███▍ | 126821/371472 [10:05:07<22:16:27, 3.05it/s] 34%|███▍ | 126822/371472 [10:05:07<22:19:05, 3.04it/s] 34%|███▍ | 126823/371472 [10:05:08<20:48:57, 3.26it/s] 34%|███▍ | 126824/371472 [10:05:08<20:12:33, 3.36it/s] 34%|███▍ | 126825/371472 [10:05:08<19:37:39, 3.46it/s] 34%|███▍ | 126826/371472 [10:05:08<20:06:54, 3.38it/s] 34%|███▍ | 126827/371472 [10:05:09<19:29:43, 3.49it/s] 34%|███▍ | 126828/371472 [10:05:09<19:30:46, 3.48it/s] 34%|███▍ | 126829/371472 [10:05:09<20:07:41, 3.38it/s] 34%|███▍ | 126830/371472 [10:05:10<20:13:46, 3.36it/s] 34%|███▍ | 126831/371472 [10:05:10<20:27:20, 3.32it/s] 34%|███▍ | 126832/371472 [10:05:10<22:46:30, 2.98it/s] 34%|███▍ | 126833/371472 [10:05:11<21:18:57, 3.19it/s] 34%|███▍ | 126834/371472 [10:05:11<20:35:30, 3.30it/s] 34%|███▍ | 126835/371472 [10:05:11<19:50:04, 3.43it/s] 34%|███▍ | 126836/371472 [10:05:11<20:19:53, 3.34it/s] 34%|███▍ | 126837/371472 [10:05:12<21:11:33, 3.21it/s] 34%|███▍ | 126838/371472 [10:05:12<20:29:28, 3.32it/s] 34%|███▍ | 126839/371472 [10:05:12<19:51:28, 3.42it/s] 34%|███▍ | 126840/371472 [10:05:13<19:23:05, 3.51it/s] {'loss': 3.2621, 'learning_rate': 6.93012131267642e-07, 'epoch': 5.46} + 34%|███▍ | 126840/371472 [10:05:13<19:23:05, 3.51it/s] 34%|███▍ | 126841/371472 [10:05:13<19:40:45, 3.45it/s] 34%|███▍ | 126842/371472 [10:05:13<19:48:48, 3.43it/s] 34%|███▍ | 126843/371472 [10:05:13<19:24:47, 3.50it/s] 34%|███▍ | 126844/371472 [10:05:14<19:13:00, 3.54it/s] 34%|███▍ | 126845/371472 [10:05:14<18:31:02, 3.67it/s] 34%|███▍ | 126846/371472 [10:05:14<18:28:22, 3.68it/s] 34%|███▍ | 126847/371472 [10:05:15<20:50:22, 3.26it/s] 34%|███▍ | 126848/371472 [10:05:15<19:56:08, 3.41it/s] 34%|███▍ | 126849/371472 [10:05:15<19:30:13, 3.48it/s] 34%|███▍ | 126850/371472 [10:05:15<18:43:56, 3.63it/s] 34%|███▍ | 126851/371472 [10:05:16<18:53:53, 3.60it/s] 34%|███▍ | 126852/371472 [10:05:16<19:42:08, 3.45it/s] 34%|███▍ | 126853/371472 [10:05:16<20:55:58, 3.25it/s] 34%|███▍ | 126854/371472 [10:05:17<20:16:18, 3.35it/s] 34%|███▍ | 126855/371472 [10:05:17<19:05:38, 3.56it/s] 34%|███▍ | 126856/371472 [10:05:17<18:38:13, 3.65it/s] 34%|███▍ | 126857/371472 [10:05:17<19:06:35, 3.56it/s] 34%|███▍ | 126858/371472 [10:05:18<18:41:54, 3.63it/s] 34%|███▍ | 126859/371472 [10:05:18<18:32:47, 3.66it/s] 34%|███▍ | 126860/371472 [10:05:18<20:21:36, 3.34it/s] {'loss': 3.3112, 'learning_rate': 6.929636492921631e-07, 'epoch': 5.46} + 34%|███▍ | 126860/371472 [10:05:18<20:21:36, 3.34it/s] 34%|███▍ | 126861/371472 [10:05:19<19:51:12, 3.42it/s] 34%|███▍ | 126862/371472 [10:05:19<20:38:42, 3.29it/s] 34%|███▍ | 126863/371472 [10:05:19<19:36:30, 3.47it/s] 34%|███▍ | 126864/371472 [10:05:19<19:40:10, 3.45it/s] 34%|███▍ | 126865/371472 [10:05:20<19:04:32, 3.56it/s] 34%|███▍ | 126866/371472 [10:05:20<20:41:42, 3.28it/s] 34%|███▍ | 126867/371472 [10:05:20<19:41:41, 3.45it/s] 34%|███▍ | 126868/371472 [10:05:21<18:54:29, 3.59it/s] 34%|███▍ | 126869/371472 [10:05:21<18:29:42, 3.67it/s] 34%|███▍ | 126870/371472 [10:05:21<18:00:42, 3.77it/s] 34%|███▍ | 126871/371472 [10:05:21<18:28:04, 3.68it/s] 34%|███▍ | 126872/371472 [10:05:22<18:03:11, 3.76it/s] 34%|███▍ | 126873/371472 [10:05:22<18:21:40, 3.70it/s] 34%|███▍ | 126874/371472 [10:05:22<18:50:33, 3.61it/s] 34%|███▍ | 126875/371472 [10:05:22<18:04:31, 3.76it/s] 34%|███▍ | 126876/371472 [10:05:23<19:37:52, 3.46it/s] 34%|███▍ | 126877/371472 [10:05:23<19:32:36, 3.48it/s] 34%|███▍ | 126878/371472 [10:05:23<18:51:09, 3.60it/s] 34%|███▍ | 126879/371472 [10:05:24<18:31:52, 3.67it/s] 34%|███▍ | 126880/371472 [10:05:24<18:21:41, 3.70it/s] {'loss': 3.2661, 'learning_rate': 6.929151673166843e-07, 'epoch': 5.46} + 34%|███▍ | 126880/371472 [10:05:24<18:21:41, 3.70it/s] 34%|███▍ | 126881/371472 [10:05:24<18:15:38, 3.72it/s] 34%|███▍ | 126882/371472 [10:05:24<19:00:10, 3.58it/s] 34%|███▍ | 126883/371472 [10:05:25<19:59:03, 3.40it/s] 34%|███▍ | 126884/371472 [10:05:25<19:37:44, 3.46it/s] 34%|███▍ | 126885/371472 [10:05:25<18:40:57, 3.64it/s] 34%|███▍ | 126886/371472 [10:05:26<19:19:55, 3.51it/s] 34%|███▍ | 126887/371472 [10:05:26<20:05:46, 3.38it/s] 34%|███▍ | 126888/371472 [10:05:26<21:39:07, 3.14it/s] 34%|███▍ | 126889/371472 [10:05:27<20:28:28, 3.32it/s] 34%|███▍ | 126890/371472 [10:05:27<19:34:28, 3.47it/s] 34%|███▍ | 126891/371472 [10:05:27<18:32:41, 3.66it/s] 34%|███▍ | 126892/371472 [10:05:27<18:11:03, 3.74it/s] 34%|███▍ | 126893/371472 [10:05:28<18:57:44, 3.58it/s] 34%|███▍ | 126894/371472 [10:05:28<19:53:45, 3.41it/s] 34%|███▍ | 126895/371472 [10:05:28<19:58:14, 3.40it/s] 34%|███▍ | 126896/371472 [10:05:29<20:18:10, 3.35it/s] 34%|███▍ | 126897/371472 [10:05:29<19:28:45, 3.49it/s] 34%|███▍ | 126898/371472 [10:05:29<18:54:14, 3.59it/s] 34%|███▍ | 126899/371472 [10:05:29<19:36:45, 3.46it/s] 34%|███▍ | 126900/371472 [10:05:30<19:05:09, 3.56it/s] {'loss': 3.2265, 'learning_rate': 6.928666853412053e-07, 'epoch': 5.47} + 34%|███▍ | 126900/371472 [10:05:30<19:05:09, 3.56it/s] 34%|███▍ | 126901/371472 [10:05:30<18:17:48, 3.71it/s] 34%|███▍ | 126902/371472 [10:05:30<18:12:22, 3.73it/s] 34%|███▍ | 126903/371472 [10:05:30<18:07:38, 3.75it/s] 34%|███▍ | 126904/371472 [10:05:31<18:25:20, 3.69it/s] 34%|███▍ | 126905/371472 [10:05:31<18:08:14, 3.75it/s] 34%|███▍ | 126906/371472 [10:05:31<18:22:37, 3.70it/s] 34%|███▍ | 126907/371472 [10:05:31<18:14:00, 3.73it/s] 34%|███▍ | 126908/371472 [10:05:32<17:59:32, 3.78it/s] 34%|███▍ | 126909/371472 [10:05:32<19:10:46, 3.54it/s] 34%|███▍ | 126910/371472 [10:05:32<18:48:51, 3.61it/s] 34%|███▍ | 126911/371472 [10:05:33<20:39:46, 3.29it/s] 34%|███▍ | 126912/371472 [10:05:33<20:24:38, 3.33it/s] 34%|███▍ | 126913/371472 [10:05:33<20:01:46, 3.39it/s] 34%|███▍ | 126914/371472 [10:05:34<20:50:20, 3.26it/s] 34%|███▍ | 126915/371472 [10:05:34<20:28:11, 3.32it/s] 34%|███▍ | 126916/371472 [10:05:34<19:49:44, 3.43it/s] 34%|███▍ | 126917/371472 [10:05:34<20:08:13, 3.37it/s] 34%|███▍ | 126918/371472 [10:05:35<19:10:35, 3.54it/s] 34%|███▍ | 126919/371472 [10:05:35<19:21:22, 3.51it/s] 34%|███▍ | 126920/371472 [10:05:35<18:39:42, 3.64it/s] {'loss': 3.3469, 'learning_rate': 6.928182033657264e-07, 'epoch': 5.47} + 34%|███▍ | 126920/371472 [10:05:35<18:39:42, 3.64it/s] 34%|███▍ | 126921/371472 [10:05:36<18:47:32, 3.61it/s] 34%|███▍ | 126922/371472 [10:05:36<18:48:51, 3.61it/s] 34%|███▍ | 126923/371472 [10:05:36<18:48:27, 3.61it/s] 34%|███▍ | 126924/371472 [10:05:36<18:26:01, 3.69it/s] 34%|███▍ | 126925/371472 [10:05:37<18:00:00, 3.77it/s] 34%|███▍ | 126926/371472 [10:05:37<17:43:45, 3.83it/s] 34%|███▍ | 126927/371472 [10:05:37<17:52:22, 3.80it/s] 34%|███▍ | 126928/371472 [10:05:37<17:33:48, 3.87it/s] 34%|███▍ | 126929/371472 [10:05:38<17:15:10, 3.94it/s] 34%|███▍ | 126930/371472 [10:05:38<17:37:27, 3.85it/s] 34%|███▍ | 126931/371472 [10:05:38<17:53:43, 3.80it/s] 34%|███▍ | 126932/371472 [10:05:38<19:02:31, 3.57it/s] 34%|███▍ | 126933/371472 [10:05:39<18:40:35, 3.64it/s] 34%|███▍ | 126934/371472 [10:05:39<19:42:26, 3.45it/s] 34%|███▍ | 126935/371472 [10:05:39<19:24:30, 3.50it/s] 34%|███▍ | 126936/371472 [10:05:40<19:17:51, 3.52it/s] 34%|███▍ | 126937/371472 [10:05:40<19:59:37, 3.40it/s] 34%|███▍ | 126938/371472 [10:05:40<19:42:11, 3.45it/s] 34%|███▍ | 126939/371472 [10:05:41<20:48:56, 3.26it/s] 34%|███▍ | 126940/371472 [10:05:41<20:07:17, 3.38it/s] {'loss': 3.2671, 'learning_rate': 6.927697213902476e-07, 'epoch': 5.47} + 34%|███▍ | 126940/371472 [10:05:41<20:07:17, 3.38it/s] 34%|███▍ | 126941/371472 [10:05:41<21:22:59, 3.18it/s] 34%|███▍ | 126942/371472 [10:05:42<21:01:21, 3.23it/s] 34%|███▍ | 126943/371472 [10:05:42<21:53:17, 3.10it/s] 34%|███▍ | 126944/371472 [10:05:42<20:33:31, 3.30it/s] 34%|███▍ | 126945/371472 [10:05:42<19:07:57, 3.55it/s] 34%|███▍ | 126946/371472 [10:05:43<18:40:39, 3.64it/s] 34%|███▍ | 126947/371472 [10:05:43<18:34:28, 3.66it/s] 34%|███▍ | 126948/371472 [10:05:43<18:16:53, 3.72it/s] 34%|███▍ | 126949/371472 [10:05:43<18:07:33, 3.75it/s] 34%|███▍ | 126950/371472 [10:05:44<20:33:14, 3.30it/s] 34%|███▍ | 126951/371472 [10:05:44<19:59:02, 3.40it/s] 34%|███▍ | 126952/371472 [10:05:44<18:51:19, 3.60it/s] 34%|███▍ | 126953/371472 [10:05:45<18:36:56, 3.65it/s] 34%|███▍ | 126954/371472 [10:05:45<18:15:57, 3.72it/s] 34%|███▍ | 126955/371472 [10:05:45<18:28:37, 3.68it/s] 34%|███▍ | 126956/371472 [10:05:45<18:34:47, 3.66it/s] 34%|███▍ | 126957/371472 [10:05:46<18:16:05, 3.72it/s] 34%|███▍ | 126958/371472 [10:05:46<17:42:04, 3.84it/s] 34%|███▍ | 126959/371472 [10:05:46<17:12:43, 3.95it/s] 34%|███▍ | 126960/371472 [10:05:46<17:57:52, 3.78it/s] {'loss': 3.1797, 'learning_rate': 6.927212394147688e-07, 'epoch': 5.47} + 34%|███▍ | 126960/371472 [10:05:46<17:57:52, 3.78it/s] 34%|███▍ | 126961/371472 [10:05:47<18:39:14, 3.64it/s] 34%|███▍ | 126962/371472 [10:05:47<18:06:43, 3.75it/s] 34%|███▍ | 126963/371472 [10:05:47<17:30:48, 3.88it/s] 34%|███▍ | 126964/371472 [10:05:48<19:15:43, 3.53it/s] 34%|███▍ | 126965/371472 [10:05:48<19:07:22, 3.55it/s] 34%|███▍ | 126966/371472 [10:05:48<18:16:23, 3.72it/s] 34%|███▍ | 126967/371472 [10:05:48<18:59:42, 3.58it/s] 34%|███▍ | 126968/371472 [10:05:49<18:40:12, 3.64it/s] 34%|███▍ | 126969/371472 [10:05:49<18:26:21, 3.68it/s] 34%|███▍ | 126970/371472 [10:05:49<18:46:34, 3.62it/s] 34%|███▍ | 126971/371472 [10:05:49<18:58:49, 3.58it/s] 34%|███▍ | 126972/371472 [10:05:50<18:58:10, 3.58it/s] 34%|███▍ | 126973/371472 [10:05:50<18:13:34, 3.73it/s] 34%|███▍ | 126974/371472 [10:05:50<18:10:19, 3.74it/s] 34%|███▍ | 126975/371472 [10:05:50<17:34:10, 3.87it/s] 34%|███▍ | 126976/371472 [10:05:51<17:28:18, 3.89it/s] 34%|███▍ | 126977/371472 [10:05:51<17:11:24, 3.95it/s] 34%|███▍ | 126978/371472 [10:05:51<17:10:40, 3.95it/s] 34%|███▍ | 126979/371472 [10:05:52<17:49:32, 3.81it/s] 34%|███▍ | 126980/371472 [10:05:52<17:37:30, 3.85it/s] {'loss': 3.3644, 'learning_rate': 6.926727574392897e-07, 'epoch': 5.47} + 34%|███▍ | 126980/371472 [10:05:52<17:37:30, 3.85it/s] 34%|███▍ | 126981/371472 [10:05:52<19:12:39, 3.54it/s] 34%|███▍ | 126982/371472 [10:05:52<18:53:10, 3.60it/s] 34%|███▍ | 126983/371472 [10:05:53<18:16:59, 3.71it/s] 34%|███▍ | 126984/371472 [10:05:53<18:31:43, 3.67it/s] 34%|███▍ | 126985/371472 [10:05:53<18:01:36, 3.77it/s] 34%|███▍ | 126986/371472 [10:05:53<18:04:54, 3.76it/s] 34%|███▍ | 126987/371472 [10:05:54<18:27:44, 3.68it/s] 34%|███▍ | 126988/371472 [10:05:54<19:18:34, 3.52it/s] 34%|██���▍ | 126989/371472 [10:05:54<19:53:16, 3.41it/s] 34%|███▍ | 126990/371472 [10:05:55<19:26:33, 3.49it/s] 34%|███▍ | 126991/371472 [10:05:55<18:48:23, 3.61it/s] 34%|███▍ | 126992/371472 [10:05:55<18:34:07, 3.66it/s] 34%|███▍ | 126993/371472 [10:05:55<18:40:43, 3.64it/s] 34%|███▍ | 126994/371472 [10:05:56<19:54:57, 3.41it/s] 34%|███▍ | 126995/371472 [10:05:56<18:46:15, 3.62it/s] 34%|███▍ | 126996/371472 [10:05:56<19:46:01, 3.44it/s] 34%|███▍ | 126997/371472 [10:05:57<20:42:20, 3.28it/s] 34%|███▍ | 126998/371472 [10:05:57<19:51:58, 3.42it/s] 34%|███▍ | 126999/371472 [10:05:57<20:00:36, 3.39it/s] 34%|███▍ | 127000/371472 [10:05:57<20:07:50, 3.37it/s] {'loss': 3.188, 'learning_rate': 6.926242754638108e-07, 'epoch': 5.47} + 34%|███▍ | 127000/371472 [10:05:57<20:07:50, 3.37it/s] 34%|███▍ | 127001/371472 [10:05:58<19:47:58, 3.43it/s] 34%|███▍ | 127002/371472 [10:05:58<19:33:46, 3.47it/s] 34%|███▍ | 127003/371472 [10:05:58<19:02:24, 3.57it/s] 34%|███▍ | 127004/371472 [10:05:59<18:14:06, 3.72it/s] 34%|███▍ | 127005/371472 [10:05:59<18:15:06, 3.72it/s] 34%|███▍ | 127006/371472 [10:05:59<19:38:56, 3.46it/s] 34%|███▍ | 127007/371472 [10:05:59<19:03:03, 3.56it/s] 34%|███▍ | 127008/371472 [10:06:00<18:47:36, 3.61it/s] 34%|███▍ | 127009/371472 [10:06:00<18:33:24, 3.66it/s] 34%|███▍ | 127010/371472 [10:06:00<19:13:58, 3.53it/s] 34%|███▍ | 127011/371472 [10:06:01<19:24:55, 3.50it/s] 34%|███▍ | 127012/371472 [10:06:01<19:26:27, 3.49it/s] 34%|███▍ | 127013/371472 [10:06:01<18:48:47, 3.61it/s] 34%|███▍ | 127014/371472 [10:06:01<19:42:13, 3.45it/s] 34%|███▍ | 127015/371472 [10:06:02<19:32:55, 3.47it/s] 34%|███▍ | 127016/371472 [10:06:02<18:35:08, 3.65it/s] 34%|███▍ | 127017/371472 [10:06:02<19:26:26, 3.49it/s] 34%|███▍ | 127018/371472 [10:06:02<18:17:06, 3.71it/s] 34%|███▍ | 127019/371472 [10:06:03<19:02:22, 3.57it/s] 34%|███▍ | 127020/371472 [10:06:03<19:11:48, 3.54it/s] {'loss': 3.1395, 'learning_rate': 6.92575793488332e-07, 'epoch': 5.47} + 34%|███▍ | 127020/371472 [10:06:03<19:11:48, 3.54it/s] 34%|███▍ | 127021/371472 [10:06:03<20:14:44, 3.35it/s] 34%|███▍ | 127022/371472 [10:06:04<18:58:07, 3.58it/s] 34%|███▍ | 127023/371472 [10:06:04<19:55:09, 3.41it/s] 34%|███▍ | 127024/371472 [10:06:04<20:12:37, 3.36it/s] 34%|███▍ | 127025/371472 [10:06:05<19:24:02, 3.50it/s] 34%|███▍ | 127026/371472 [10:06:05<19:11:43, 3.54it/s] 34%|███▍ | 127027/371472 [10:06:05<18:38:27, 3.64it/s] 34%|███▍ | 127028/371472 [10:06:05<18:36:26, 3.65it/s] 34%|███▍ | 127029/371472 [10:06:06<19:04:39, 3.56it/s] 34%|███▍ | 127030/371472 [10:06:06<20:08:23, 3.37it/s] 34%|███▍ | 127031/371472 [10:06:06<19:29:22, 3.48it/s] 34%|███▍ | 127032/371472 [10:06:07<19:33:23, 3.47it/s] 34%|███▍ | 127033/371472 [10:06:07<20:56:14, 3.24it/s] 34%|███▍ | 127034/371472 [10:06:07<19:25:28, 3.50it/s] 34%|███▍ | 127035/371472 [10:06:07<18:44:14, 3.62it/s] 34%|███▍ | 127036/371472 [10:06:08<18:10:30, 3.74it/s] 34%|███▍ | 127037/371472 [10:06:08<19:20:45, 3.51it/s] 34%|███▍ | 127038/371472 [10:06:08<18:59:49, 3.57it/s] 34%|███▍ | 127039/371472 [10:06:09<19:11:17, 3.54it/s] 34%|███▍ | 127040/371472 [10:06:09<19:05:14, 3.56it/s] {'loss': 3.2592, 'learning_rate': 6.925273115128532e-07, 'epoch': 5.47} + 34%|███▍ | 127040/371472 [10:06:09<19:05:14, 3.56it/s] 34%|███▍ | 127041/371472 [10:06:09<19:41:49, 3.45it/s] 34%|███▍ | 127042/371472 [10:06:09<18:57:14, 3.58it/s] 34%|███▍ | 127043/371472 [10:06:10<19:57:13, 3.40it/s] 34%|███▍ | 127044/371472 [10:06:10<19:09:54, 3.54it/s] 34%|███▍ | 127045/371472 [10:06:10<18:44:43, 3.62it/s] 34%|███▍ | 127046/371472 [10:06:10<17:55:01, 3.79it/s] 34%|███▍ | 127047/371472 [10:06:11<18:37:16, 3.65it/s] 34%|███▍ | 127048/371472 [10:06:11<18:16:30, 3.72it/s] 34%|███▍ | 127049/371472 [10:06:11<17:32:20, 3.87it/s] 34%|███▍ | 127050/371472 [10:06:12<18:29:40, 3.67it/s] 34%|███▍ | 127051/371472 [10:06:12<18:00:28, 3.77it/s] 34%|███▍ | 127052/371472 [10:06:12<17:44:53, 3.83it/s] 34%|███▍ | 127053/371472 [10:06:12<17:50:41, 3.80it/s] 34%|███▍ | 127054/371472 [10:06:13<17:46:59, 3.82it/s] 34%|███▍ | 127055/371472 [10:06:13<17:10:10, 3.95it/s] 34%|███▍ | 127056/371472 [10:06:13<17:45:15, 3.82it/s] 34%|███▍ | 127057/371472 [10:06:13<18:14:24, 3.72it/s] 34%|███▍ | 127058/371472 [10:06:14<18:05:14, 3.75it/s] 34%|███▍ | 127059/371472 [10:06:14<17:42:28, 3.83it/s] 34%|███▍ | 127060/371472 [10:06:14<18:01:36, 3.77it/s] {'loss': 3.2996, 'learning_rate': 6.924788295373742e-07, 'epoch': 5.47} + 34%|███▍ | 127060/371472 [10:06:14<18:01:36, 3.77it/s] 34%|███▍ | 127061/371472 [10:06:14<17:52:18, 3.80it/s] 34%|███▍ | 127062/371472 [10:06:15<23:30:56, 2.89it/s] 34%|███▍ | 127063/371472 [10:06:15<21:59:12, 3.09it/s] 34%|███▍ | 127064/371472 [10:06:15<20:47:59, 3.26it/s] 34%|███▍ | 127065/371472 [10:06:16<21:01:54, 3.23it/s] 34%|███▍ | 127066/371472 [10:06:16<20:05:01, 3.38it/s] 34%|███▍ | 127067/371472 [10:06:16<19:29:50, 3.48it/s] 34%|███▍ | 127068/371472 [10:06:17<18:46:44, 3.62it/s] 34%|███▍ | 127069/371472 [10:06:17<18:10:07, 3.74it/s] 34%|███▍ | 127070/371472 [10:06:17<18:28:23, 3.68it/s] 34%|███▍ | 127071/371472 [10:06:17<18:30:49, 3.67it/s] 34%|███▍ | 127072/371472 [10:06:18<18:07:13, 3.75it/s] 34%|███▍ | 127073/371472 [10:06:18<18:28:18, 3.68it/s] 34%|███▍ | 127074/371472 [10:06:18<18:26:26, 3.68it/s] 34%|███▍ | 127075/371472 [10:06:18<18:59:31, 3.57it/s] 34%|███▍ | 127076/371472 [10:06:19<18:39:01, 3.64it/s] 34%|███▍ | 127077/371472 [10:06:19<18:40:48, 3.63it/s] 34%|███▍ | 127078/371472 [10:06:19<18:09:49, 3.74it/s] 34%|███▍ | 127079/371472 [10:06:20<18:23:00, 3.69it/s] 34%|███▍ | 127080/371472 [10:06:20<18:33:22, 3.66it/s] {'loss': 3.1774, 'learning_rate': 6.924303475618953e-07, 'epoch': 5.47} + 34%|███▍ | 127080/371472 [10:06:20<18:33:22, 3.66it/s] 34%|███▍ | 127081/371472 [10:06:20<18:23:07, 3.69it/s] 34%|███▍ | 127082/371472 [10:06:20<18:07:38, 3.74it/s] 34%|███▍ | 127083/371472 [10:06:21<18:57:43, 3.58it/s] 34%|███▍ | 127084/371472 [10:06:21<18:39:29, 3.64it/s] 34%|███▍ | 127085/371472 [10:06:21<18:34:23, 3.66it/s] 34%|███▍ | 127086/371472 [10:06:21<18:58:42, 3.58it/s] 34%|███▍ | 127087/371472 [10:06:22<18:56:08, 3.59it/s] 34%|███▍ | 127088/371472 [10:06:22<18:51:31, 3.60it/s] 34%|███▍ | 127089/371472 [10:06:22<18:26:10, 3.68it/s] 34%|███▍ | 127090/371472 [10:06:23<18:10:44, 3.73it/s] 34%|███▍ | 127091/371472 [10:06:23<18:37:00, 3.65it/s] 34%|███▍ | 127092/371472 [10:06:23<18:54:17, 3.59it/s] 34%|███▍ | 127093/371472 [10:06:23<18:52:21, 3.60it/s] 34%|███▍ | 127094/371472 [10:06:24<18:53:41, 3.59it/s] 34%|███▍ | 127095/371472 [10:06:24<18:28:29, 3.67it/s] 34%|███▍ | 127096/371472 [10:06:24<18:46:01, 3.62it/s] 34%|███▍ | 127097/371472 [10:06:24<18:04:23, 3.76it/s] 34%|███▍ | 127098/371472 [10:06:25<20:45:10, 3.27it/s] 34%|███▍ | 127099/371472 [10:06:25<20:03:40, 3.38it/s] 34%|███▍ | 127100/371472 [10:06:25<19:08:53, 3.55it/s] {'loss': 3.2721, 'learning_rate': 6.923818655864165e-07, 'epoch': 5.47} + 34%|███▍ | 127100/371472 [10:06:25<19:08:53, 3.55it/s] 34%|███▍ | 127101/371472 [10:06:26<20:07:06, 3.37it/s] 34%|███▍ | 127102/371472 [10:06:26<20:51:28, 3.25it/s] 34%|███▍ | 127103/371472 [10:06:26<21:24:15, 3.17it/s] 34%|███▍ | 127104/371472 [10:06:27<21:09:47, 3.21it/s] 34%|███▍ | 127105/371472 [10:06:27<20:50:22, 3.26it/s] 34%|███▍ | 127106/371472 [10:06:27<20:23:22, 3.33it/s] 34%|███▍ | 127107/371472 [10:06:28<19:59:22, 3.40it/s] 34%|███▍ | 127108/371472 [10:06:28<19:49:11, 3.42it/s] 34%|███▍ | 127109/371472 [10:06:28<19:02:52, 3.56it/s] 34%|███▍ | 127110/371472 [10:06:28<20:42:08, 3.28it/s] 34%|███▍ | 127111/371472 [10:06:29<20:04:39, 3.38it/s] 34%|███▍ | 127112/371472 [10:06:29<20:01:20, 3.39it/s] 34%|███▍ | 127113/371472 [10:06:29<20:28:24, 3.32it/s] 34%|███▍ | 127114/371472 [10:06:30<19:54:00, 3.41it/s] 34%|███▍ | 127115/371472 [10:06:30<19:16:13, 3.52it/s] 34%|███▍ | 127116/371472 [10:06:30<18:52:22, 3.60it/s] 34%|███▍ | 127117/371472 [10:06:30<19:13:49, 3.53it/s] 34%|███▍ | 127118/371472 [10:06:31<19:46:30, 3.43it/s] 34%|███▍ | 127119/371472 [10:06:31<18:32:41, 3.66it/s] 34%|███▍ | 127120/371472 [10:06:31<17:53:40, 3.79it/s] {'loss': 3.2907, 'learning_rate': 6.923333836109374e-07, 'epoch': 5.48} + 34%|███▍ | 127120/371472 [10:06:31<17:53:40, 3.79it/s] 34%|███▍ | 127121/371472 [10:06:31<17:54:40, 3.79it/s] 34%|███▍ | 127122/371472 [10:06:32<18:20:11, 3.70it/s] 34%|███▍ | 127123/371472 [10:06:32<18:35:25, 3.65it/s] 34%|███▍ | 127124/371472 [10:06:32<18:22:17, 3.69it/s] 34%|███▍ | 127125/371472 [10:06:33<19:15:38, 3.52it/s] 34%|███▍ | 127126/371472 [10:06:33<18:39:21, 3.64it/s] 34%|███▍ | 127127/371472 [10:06:33<17:59:31, 3.77it/s] 34%|███▍ | 127128/371472 [10:06:33<18:16:40, 3.71it/s] 34%|███▍ | 127129/371472 [10:06:34<23:20:00, 2.91it/s] 34%|███▍ | 127130/371472 [10:06:34<22:52:50, 2.97it/s] 34%|███▍ | 127131/371472 [10:06:35<23:28:27, 2.89it/s] 34%|███▍ | 127132/371472 [10:06:35<21:43:22, 3.12it/s] 34%|███▍ | 127133/371472 [10:06:35<24:44:38, 2.74it/s] 34%|███▍ | 127134/371472 [10:06:36<22:33:31, 3.01it/s] 34%|███▍ | 127135/371472 [10:06:36<21:24:58, 3.17it/s] 34%|███▍ | 127136/371472 [10:06:36<20:11:12, 3.36it/s] 34%|███▍ | 127137/371472 [10:06:36<18:56:44, 3.58it/s] 34%|███▍ | 127138/371472 [10:06:37<19:11:43, 3.54it/s] 34%|███▍ | 127139/371472 [10:06:37<18:43:09, 3.63it/s] 34%|███▍ | 127140/371472 [10:06:37<18:35:07, 3.65it/s] {'loss': 3.2786, 'learning_rate': 6.922849016354585e-07, 'epoch': 5.48} + 34%|███▍ | 127140/371472 [10:06:37<18:35:07, 3.65it/s] 34%|███▍ | 127141/371472 [10:06:37<17:57:36, 3.78it/s] 34%|███▍ | 127142/371472 [10:06:38<19:29:58, 3.48it/s] 34%|███▍ | 127143/371472 [10:06:38<20:32:23, 3.30it/s] 34%|███▍ | 127144/371472 [10:06:38<20:47:35, 3.26it/s] 34%|███▍ | 127145/371472 [10:06:39<23:03:01, 2.94it/s] 34%|███▍ | 127146/371472 [10:06:39<21:07:15, 3.21it/s] 34%|███▍ | 127147/371472 [10:06:39<20:29:40, 3.31it/s] 34%|███▍ | 127148/371472 [10:06:40<19:57:19, 3.40it/s] 34%|███▍ | 127149/371472 [10:06:40<19:40:50, 3.45it/s] 34%|███▍ | 127150/371472 [10:06:40<18:39:58, 3.64it/s] 34%|███▍ | 127151/371472 [10:06:40<20:00:53, 3.39it/s] 34%|███▍ | 127152/371472 [10:06:41<20:23:20, 3.33it/s] 34%|███▍ | 127153/371472 [10:06:41<19:25:54, 3.49it/s] 34%|███▍ | 127154/371472 [10:06:41<19:35:46, 3.46it/s] 34%|███▍ | 127155/371472 [10:06:42<18:37:54, 3.64it/s] 34%|███▍ | 127156/371472 [10:06:42<18:14:37, 3.72it/s] 34%|███▍ | 127157/371472 [10:06:42<17:46:13, 3.82it/s] 34%|███▍ | 127158/371472 [10:06:42<17:38:48, 3.85it/s] 34%|███▍ | 127159/371472 [10:06:43<18:57:28, 3.58it/s] 34%|███▍ | 127160/371472 [10:06:43<18:22:57, 3.69it/s] {'loss': 3.3626, 'learning_rate': 6.922364196599797e-07, 'epoch': 5.48} + 34%|███▍ | 127160/371472 [10:06:43<18:22:57, 3.69it/s] 34%|███▍ | 127161/371472 [10:06:43<17:59:58, 3.77it/s] 34%|███▍ | 127162/371472 [10:06:43<17:48:11, 3.81it/s] 34%|███▍ | 127163/371472 [10:06:44<18:07:34, 3.74it/s] 34%|███▍ | 127164/371472 [10:06:44<17:26:47, 3.89it/s] 34%|███▍ | 127165/371472 [10:06:44<18:04:59, 3.75it/s] 34%|███▍ | 127166/371472 [10:06:44<17:53:04, 3.79it/s] 34%|███▍ | 127167/371472 [10:06:45<18:01:32, 3.76it/s] 34%|███▍ | 127168/371472 [10:06:45<17:46:05, 3.82it/s] 34%|███▍ | 127169/371472 [10:06:45<17:21:03, 3.91it/s] 34%|███▍ | 127170/371472 [10:06:45<16:57:22, 4.00it/s] 34%|███▍ | 127171/371472 [10:06:46<18:44:51, 3.62it/s] 34%|███▍ | 127172/371472 [10:06:46<18:39:16, 3.64it/s] 34%|███▍ | 127173/371472 [10:06:46<18:09:22, 3.74it/s] 34%|███▍ | 127174/371472 [10:06:47<18:10:19, 3.73it/s] 34%|███▍ | 127175/371472 [10:06:47<17:56:19, 3.78it/s] 34%|███▍ | 127176/371472 [10:06:47<18:24:22, 3.69it/s] 34%|███▍ | 127177/371472 [10:06:47<19:06:47, 3.55it/s] 34%|███▍ | 127178/371472 [10:06:48<19:07:16, 3.55it/s] 34%|███▍ | 127179/371472 [10:06:48<20:38:23, 3.29it/s] 34%|███▍ | 127180/371472 [10:06:48<20:44:43, 3.27it/s] {'loss': 3.1141, 'learning_rate': 6.921879376845009e-07, 'epoch': 5.48} + 34%|███▍ | 127180/371472 [10:06:48<20:44:43, 3.27it/s] 34%|███▍ | 127181/371472 [10:06:49<19:23:43, 3.50it/s] 34%|███▍ | 127182/371472 [10:06:49<18:55:32, 3.59it/s] 34%|███▍ | 127183/371472 [10:06:49<18:07:56, 3.74it/s] 34%|███▍ | 127184/371472 [10:06:49<18:07:35, 3.74it/s] 34%|███▍ | 127185/371472 [10:06:50<18:52:01, 3.60it/s] 34%|███▍ | 127186/371472 [10:06:50<19:31:57, 3.47it/s] 34%|███▍ | 127187/371472 [10:06:50<19:11:57, 3.53it/s] 34%|███▍ | 127188/371472 [10:06:51<18:39:04, 3.64it/s] 34%|███▍ | 127189/371472 [10:06:51<18:44:29, 3.62it/s] 34%|███▍ | 127190/371472 [10:06:51<19:20:21, 3.51it/s] 34%|███▍ | 127191/371472 [10:06:51<18:22:37, 3.69it/s] 34%|███▍ | 127192/371472 [10:06:52<18:24:24, 3.69it/s] 34%|███▍ | 127193/371472 [10:06:52<19:18:59, 3.51it/s] 34%|███▍ | 127194/371472 [10:06:52<18:45:40, 3.62it/s] 34%|███▍ | 127195/371472 [10:06:53<19:08:42, 3.54it/s] 34%|███▍ | 127196/371472 [10:06:53<19:13:09, 3.53it/s] 34%|███▍ | 127197/371472 [10:06:53<20:19:53, 3.34it/s] 34%|███▍ | 127198/371472 [10:06:53<20:22:49, 3.33it/s] 34%|███▍ | 127199/371472 [10:06:54<19:29:34, 3.48it/s] 34%|███▍ | 127200/371472 [10:06:54<18:52:54, 3.59it/s] {'loss': 3.3689, 'learning_rate': 6.92139455709022e-07, 'epoch': 5.48} + 34%|███▍ | 127200/371472 [10:06:54<18:52:54, 3.59it/s] 34%|███▍ | 127201/371472 [10:06:54<19:00:57, 3.57it/s] 34%|███▍ | 127202/371472 [10:06:55<18:49:35, 3.60it/s] 34%|███▍ | 127203/371472 [10:06:55<18:49:38, 3.60it/s] 34%|███▍ | 127204/371472 [10:06:55<19:55:15, 3.41it/s] 34%|███▍ | 127205/371472 [10:06:55<19:32:01, 3.47it/s] 34%|███▍ | 127206/371472 [10:06:56<19:02:27, 3.56it/s] 34%|███▍ | 127207/371472 [10:06:56<19:12:43, 3.53it/s] 34%|███▍ | 127208/371472 [10:06:56<18:41:56, 3.63it/s] 34%|███▍ | 127209/371472 [10:06:57<19:13:03, 3.53it/s] 34%|███▍ | 127210/371472 [10:06:57<19:13:02, 3.53it/s] 34%|███▍ | 127211/371472 [10:06:57<19:18:45, 3.51it/s] 34%|███▍ | 127212/371472 [10:06:57<18:46:35, 3.61it/s] 34%|███▍ | 127213/371472 [10:06:58<18:57:10, 3.58it/s] 34%|███▍ | 127214/371472 [10:06:58<18:58:43, 3.58it/s] 34%|███▍ | 127215/371472 [10:06:58<19:13:31, 3.53it/s] 34%|███▍ | 127216/371472 [10:06:59<19:37:45, 3.46it/s] 34%|███▍ | 127217/371472 [10:06:59<19:06:07, 3.55it/s] 34%|███▍ | 127218/371472 [10:06:59<18:20:17, 3.70it/s] 34%|███▍ | 127219/371472 [10:06:59<18:12:51, 3.73it/s] 34%|███▍ | 127220/371472 [10:07:00<17:43:14, 3.83it/s] {'loss': 3.1013, 'learning_rate': 6.92090973733543e-07, 'epoch': 5.48} + 34%|███▍ | 127220/371472 [10:07:00<17:43:14, 3.83it/s] 34%|███▍ | 127221/371472 [10:07:00<19:07:31, 3.55it/s] 34%|███▍ | 127222/371472 [10:07:00<18:49:00, 3.61it/s] 34%|███▍ | 127223/371472 [10:07:00<19:32:53, 3.47it/s] 34%|███▍ | 127224/371472 [10:07:01<18:58:57, 3.57it/s] 34%|███▍ | 127225/371472 [10:07:01<18:47:38, 3.61it/s] 34%|███▍ | 127226/371472 [10:07:01<19:23:04, 3.50it/s] 34%|███▍ | 127227/371472 [10:07:02<19:15:54, 3.52it/s] 34%|███▍ | 127228/371472 [10:07:02<20:02:13, 3.39it/s] 34%|███▍ | 127229/371472 [10:07:02<19:28:06, 3.48it/s] 34%|███▍ | 127230/371472 [10:07:02<19:35:51, 3.46it/s] 34%|███▍ | 127231/371472 [10:07:03<18:52:47, 3.59it/s] 34%|███▍ | 127232/371472 [10:07:03<18:23:54, 3.69it/s] 34%|███▍ | 127233/371472 [10:07:03<18:34:52, 3.65it/s] 34%|███▍ | 127234/371472 [10:07:03<17:51:04, 3.80it/s] 34%|███▍ | 127235/371472 [10:07:04<17:30:25, 3.88it/s] 34%|███▍ | 127236/371472 [10:07:04<19:54:45, 3.41it/s] 34%|███▍ | 127237/371472 [10:07:04<19:54:25, 3.41it/s] 34%|███▍ | 127238/371472 [10:07:05<20:28:40, 3.31it/s] 34%|███▍ | 127239/371472 [10:07:05<19:54:38, 3.41it/s] 34%|███▍ | 127240/371472 [10:07:05<20:16:25, 3.35it/s] {'loss': 3.1667, 'learning_rate': 6.920424917580641e-07, 'epoch': 5.48} + 34%|███▍ | 127240/371472 [10:07:05<20:16:25, 3.35it/s] 34%|███▍ | 127241/371472 [10:07:06<21:06:25, 3.21it/s] 34%|███▍ | 127242/371472 [10:07:06<20:37:28, 3.29it/s] 34%|███▍ | 127243/371472 [10:07:06<20:19:44, 3.34it/s] 34%|███▍ | 127244/371472 [10:07:06<19:40:30, 3.45it/s] 34%|███▍ | 127245/371472 [10:07:07<19:00:01, 3.57it/s] 34%|███▍ | 127246/371472 [10:07:07<18:25:20, 3.68it/s] 34%|███▍ | 127247/371472 [10:07:07<18:03:21, 3.76it/s] 34%|███▍ | 127248/371472 [10:07:08<17:55:50, 3.78it/s] 34%|███▍ | 127249/371472 [10:07:08<17:48:24, 3.81it/s] 34%|███▍ | 127250/371472 [10:07:08<17:53:52, 3.79it/s] 34%|███▍ | 127251/371472 [10:07:08<18:22:36, 3.69it/s] 34%|███▍ | 127252/371472 [10:07:09<19:29:56, 3.48it/s] 34%|███▍ | 127253/371472 [10:07:09<19:12:36, 3.53it/s] 34%|███▍ | 127254/371472 [10:07:09<18:41:07, 3.63it/s] 34%|███▍ | 127255/371472 [10:07:09<18:20:52, 3.70it/s] 34%|███▍ | 127256/371472 [10:07:10<17:57:24, 3.78it/s] 34%|███▍ | 127257/371472 [10:07:10<20:18:40, 3.34it/s] 34%|███▍ | 127258/371472 [10:07:10<19:30:44, 3.48it/s] 34%|███▍ | 127259/371472 [10:07:11<19:06:00, 3.55it/s] 34%|███▍ | 127260/371472 [10:07:11<18:56:07, 3.58it/s] {'loss': 3.2935, 'learning_rate': 6.919940097825853e-07, 'epoch': 5.48} + 34%|███▍ | 127260/371472 [10:07:11<18:56:07, 3.58it/s] 34%|███▍ | 127261/371472 [10:07:11<19:02:33, 3.56it/s] 34%|███▍ | 127262/371472 [10:07:11<18:18:30, 3.71it/s] 34%|███▍ | 127263/371472 [10:07:12<19:02:31, 3.56it/s] 34%|███▍ | 127264/371472 [10:07:12<19:20:45, 3.51it/s] 34%|███▍ | 127265/371472 [10:07:12<19:43:57, 3.44it/s] 34%|███▍ | 127266/371472 [10:07:13<19:43:22, 3.44it/s] 34%|███▍ | 127267/371472 [10:07:13<18:48:20, 3.61it/s] 34%|███▍ | 127268/371472 [10:07:13<18:55:34, 3.58it/s] 34%|███▍ | 127269/371472 [10:07:13<17:57:15, 3.78it/s] 34%|███▍ | 127270/371472 [10:07:14<17:46:50, 3.82it/s] 34%|███▍ | 127271/371472 [10:07:14<18:25:40, 3.68it/s] 34%|███▍ | 127272/371472 [10:07:14<18:08:09, 3.74it/s] 34%|███▍ | 127273/371472 [10:07:14<18:51:52, 3.60it/s] 34%|███▍ | 127274/371472 [10:07:15<19:02:11, 3.56it/s] 34%|███▍ | 127275/371472 [10:07:15<19:12:21, 3.53it/s] 34%|███▍ | 127276/371472 [10:07:15<21:46:21, 3.12it/s] 34%|███▍ | 127277/371472 [10:07:16<20:52:35, 3.25it/s] 34%|███▍ | 127278/371472 [10:07:16<20:32:54, 3.30it/s] 34%|███▍ | 127279/371472 [10:07:16<19:23:17, 3.50it/s] 34%|███▍ | 127280/371472 [10:07:17<18:59:22, 3.57it/s] {'loss': 3.1647, 'learning_rate': 6.919455278071063e-07, 'epoch': 5.48} + 34%|███▍ | 127280/371472 [10:07:17<18:59:22, 3.57it/s] 34%|███▍ | 127281/371472 [10:07:17<19:29:20, 3.48it/s] 34%|███▍ | 127282/371472 [10:07:17<19:44:01, 3.44it/s] 34%|███▍ | 127283/371472 [10:07:17<18:25:48, 3.68it/s] 34%|███▍ | 127284/371472 [10:07:18<18:08:54, 3.74it/s] 34%|███▍ | 127285/371472 [10:07:18<18:04:02, 3.75it/s] 34%|███▍ | 127286/371472 [10:07:18<18:31:58, 3.66it/s] 34%|███▍ | 127287/371472 [10:07:18<19:00:20, 3.57it/s] 34%|███▍ | 127288/371472 [10:07:19<18:44:58, 3.62it/s] 34%|███▍ | 127289/371472 [10:07:19<18:50:44, 3.60it/s] 34%|███▍ | 127290/371472 [10:07:19<18:29:34, 3.67it/s] 34%|███▍ | 127291/371472 [10:07:20<19:38:58, 3.45it/s] 34%|███▍ | 127292/371472 [10:07:20<19:48:30, 3.42it/s] 34%|███▍ | 127293/371472 [10:07:20<19:12:14, 3.53it/s] 34%|███▍ | 127294/371472 [10:07:20<18:52:46, 3.59it/s] 34%|███▍ | 127295/371472 [10:07:21<19:24:22, 3.50it/s] 34%|███▍ | 127296/371472 [10:07:21<20:18:10, 3.34it/s] 34%|███▍ | 127297/371472 [10:07:21<19:30:27, 3.48it/s] 34%|███▍ | 127298/371472 [10:07:22<20:16:17, 3.35it/s] 34%|███▍ | 127299/371472 [10:07:22<19:10:09, 3.54it/s] 34%|███▍ | 127300/371472 [10:07:22<19:22:08, 3.50it/s] {'loss': 3.3421, 'learning_rate': 6.918970458316274e-07, 'epoch': 5.48} + 34%|███▍ | 127300/371472 [10:07:22<19:22:08, 3.50it/s] 34%|███▍ | 127301/371472 [10:07:22<18:47:31, 3.61it/s] 34%|███▍ | 127302/371472 [10:07:23<18:47:18, 3.61it/s] 34%|███▍ | 127303/371472 [10:07:23<20:16:45, 3.34it/s] 34%|███▍ | 127304/371472 [10:07:23<19:22:41, 3.50it/s] 34%|███▍ | 127305/371472 [10:07:24<18:52:19, 3.59it/s] 34%|███▍ | 127306/371472 [10:07:24<18:26:43, 3.68it/s] 34%|███▍ | 127307/371472 [10:07:24<19:23:53, 3.50it/s] 34%|███▍ | 127308/371472 [10:07:24<18:26:17, 3.68it/s] 34%|███▍ | 127309/371472 [10:07:25<18:05:11, 3.75it/s] 34%|███▍ | 127310/371472 [10:07:25<18:34:57, 3.65it/s] 34%|███▍ | 127311/371472 [10:07:25<18:16:22, 3.71it/s] 34%|███▍ | 127312/371472 [10:07:26<19:09:20, 3.54it/s] 34%|███▍ | 127313/371472 [10:07:26<19:45:47, 3.43it/s] 34%|███▍ | 127314/371472 [10:07:26<19:17:14, 3.52it/s] 34%|███▍ | 127315/371472 [10:07:26<19:22:03, 3.50it/s] 34%|███▍ | 127316/371472 [10:07:27<18:34:21, 3.65it/s] 34%|███▍ | 127317/371472 [10:07:27<19:26:24, 3.49it/s] 34%|███▍ | 127318/371472 [10:07:27<19:28:46, 3.48it/s] 34%|███▍ | 127319/371472 [10:07:28<19:23:18, 3.50it/s] 34%|███▍ | 127320/371472 [10:07:28<18:45:12, 3.62it/s] {'loss': 3.4923, 'learning_rate': 6.918485638561486e-07, 'epoch': 5.48} + 34%|███▍ | 127320/371472 [10:07:28<18:45:12, 3.62it/s] 34%|███▍ | 127321/371472 [10:07:28<18:08:25, 3.74it/s] 34%|███▍ | 127322/371472 [10:07:28<19:03:15, 3.56it/s] 34%|███▍ | 127323/371472 [10:07:29<19:06:56, 3.55it/s] 34%|███▍ | 127324/371472 [10:07:29<21:18:03, 3.18it/s] 34%|███▍ | 127325/371472 [10:07:29<20:27:58, 3.31it/s] 34%|███▍ | 127326/371472 [10:07:30<19:24:42, 3.49it/s] 34%|███▍ | 127327/371472 [10:07:30<18:37:22, 3.64it/s] 34%|███▍ | 127328/371472 [10:07:30<18:43:52, 3.62it/s] 34%|███▍ | 127329/371472 [10:07:30<19:18:15, 3.51it/s] 34%|███▍ | 127330/371472 [10:07:31<18:45:30, 3.62it/s] 34%|███▍ | 127331/371472 [10:07:31<18:41:04, 3.63it/s] 34%|███▍ | 127332/371472 [10:07:31<18:43:17, 3.62it/s] 34%|███▍ | 127333/371472 [10:07:31<18:17:46, 3.71it/s] 34%|███▍ | 127334/371472 [10:07:32<18:43:52, 3.62it/s] 34%|███▍ | 127335/371472 [10:07:32<18:41:24, 3.63it/s] 34%|███▍ | 127336/371472 [10:07:32<18:35:59, 3.65it/s] 34%|███▍ | 127337/371472 [10:07:33<18:38:33, 3.64it/s] 34%|███▍ | 127338/371472 [10:07:33<18:33:41, 3.65it/s] 34%|███▍ | 127339/371472 [10:07:33<18:37:17, 3.64it/s] 34%|███▍ | 127340/371472 [10:07:33<18:06:07, 3.75it/s] {'loss': 3.2637, 'learning_rate': 6.918000818806698e-07, 'epoch': 5.48} + 34%|███▍ | 127340/371472 [10:07:33<18:06:07, 3.75it/s] 34%|███▍ | 127341/371472 [10:07:34<17:52:08, 3.80it/s] 34%|███▍ | 127342/371472 [10:07:34<18:09:19, 3.74it/s] 34%|███▍ | 127343/371472 [10:07:34<19:31:10, 3.47it/s] 34%|███▍ | 127344/371472 [10:07:34<19:04:40, 3.55it/s] 34%|███▍ | 127345/371472 [10:07:35<19:36:57, 3.46it/s] 34%|███▍ | 127346/371472 [10:07:35<22:19:58, 3.04it/s] 34%|███▍ | 127347/371472 [10:07:35<20:45:05, 3.27it/s] 34%|███▍ | 127348/371472 [10:07:36<19:54:38, 3.41it/s] 34%|███▍ | 127349/371472 [10:07:36<19:18:59, 3.51it/s] 34%|███▍ | 127350/371472 [10:07:36<19:39:24, 3.45it/s] 34%|███▍ | 127351/371472 [10:07:37<18:55:16, 3.58it/s] 34%|███▍ | 127352/371472 [10:07:37<18:15:55, 3.71it/s] 34%|███▍ | 127353/371472 [10:07:37<19:31:11, 3.47it/s] 34%|███▍ | 127354/371472 [10:07:37<21:05:19, 3.22it/s] 34%|███▍ | 127355/371472 [10:07:38<20:01:49, 3.39it/s] 34%|███▍ | 127356/371472 [10:07:38<19:03:42, 3.56it/s] 34%|███▍ | 127357/371472 [10:07:38<19:21:14, 3.50it/s] 34%|███▍ | 127358/371472 [10:07:39<19:23:07, 3.50it/s] 34%|███▍ | 127359/371472 [10:07:39<22:33:04, 3.01it/s] 34%|███▍ | 127360/371472 [10:07:39<21:16:09, 3.19it/s] {'loss': 3.0587, 'learning_rate': 6.917515999051908e-07, 'epoch': 5.49} + 34%|███▍ | 127360/371472 [10:07:39<21:16:09, 3.19it/s] 34%|███▍ | 127361/371472 [10:07:40<20:36:36, 3.29it/s] 34%|███▍ | 127362/371472 [10:07:40<19:12:27, 3.53it/s] 34%|███▍ | 127363/371472 [10:07:40<19:28:39, 3.48it/s] 34%|███▍ | 127364/371472 [10:07:40<19:13:31, 3.53it/s] 34%|███▍ | 127365/371472 [10:07:41<18:46:30, 3.61it/s] 34%|███▍ | 127366/371472 [10:07:41<20:19:52, 3.34it/s] 34%|███▍ | 127367/371472 [10:07:41<19:12:45, 3.53it/s] 34%|███▍ | 127368/371472 [10:07:42<20:20:10, 3.33it/s] 34%|███▍ | 127369/371472 [10:07:42<19:28:42, 3.48it/s] 34%|███▍ | 127370/371472 [10:07:42<19:20:57, 3.50it/s] 34%|███▍ | 127371/371472 [10:07:42<19:08:38, 3.54it/s] 34%|██���▍ | 127372/371472 [10:07:43<20:32:29, 3.30it/s] 34%|███▍ | 127373/371472 [10:07:43<19:28:42, 3.48it/s] 34%|███▍ | 127374/371472 [10:07:43<18:36:48, 3.64it/s] 34%|███▍ | 127375/371472 [10:07:44<19:11:43, 3.53it/s] 34%|███▍ | 127376/371472 [10:07:44<19:45:09, 3.43it/s] 34%|███▍ | 127377/371472 [10:07:44<19:04:42, 3.55it/s] 34%|███▍ | 127378/371472 [10:07:44<18:20:59, 3.70it/s] 34%|███▍ | 127379/371472 [10:07:45<18:01:40, 3.76it/s] 34%|███▍ | 127380/371472 [10:07:45<18:39:58, 3.63it/s] {'loss': 3.1862, 'learning_rate': 6.917031179297118e-07, 'epoch': 5.49} + 34%|███▍ | 127380/371472 [10:07:45<18:39:58, 3.63it/s] 34%|███▍ | 127381/371472 [10:07:45<19:49:20, 3.42it/s] 34%|███▍ | 127382/371472 [10:07:45<18:53:49, 3.59it/s] 34%|███▍ | 127383/371472 [10:07:46<19:18:25, 3.51it/s] 34%|███▍ | 127384/371472 [10:07:46<18:56:37, 3.58it/s] 34%|███▍ | 127385/371472 [10:07:46<18:33:33, 3.65it/s] 34%|███▍ | 127386/371472 [10:07:47<19:58:40, 3.39it/s] 34%|███▍ | 127387/371472 [10:07:47<19:23:55, 3.50it/s] 34%|███▍ | 127388/371472 [10:07:47<19:35:56, 3.46it/s] 34%|███▍ | 127389/371472 [10:07:47<19:06:29, 3.55it/s] 34%|███▍ | 127390/371472 [10:07:48<18:56:03, 3.58it/s] 34%|███▍ | 127391/371472 [10:07:48<19:28:25, 3.48it/s] 34%|███▍ | 127392/371472 [10:07:48<19:20:08, 3.51it/s] 34%|███▍ | 127393/371472 [10:07:49<18:43:25, 3.62it/s] 34%|███▍ | 127394/371472 [10:07:49<19:14:50, 3.52it/s] 34%|███▍ | 127395/371472 [10:07:49<19:12:19, 3.53it/s] 34%|███▍ | 127396/371472 [10:07:49<18:25:43, 3.68it/s] 34%|███▍ | 127397/371472 [10:07:50<17:58:44, 3.77it/s] 34%|███▍ | 127398/371472 [10:07:50<18:15:12, 3.71it/s] 34%|███▍ | 127399/371472 [10:07:50<19:17:45, 3.51it/s] 34%|███▍ | 127400/371472 [10:07:51<18:54:43, 3.58it/s] {'loss': 3.1303, 'learning_rate': 6.91654635954233e-07, 'epoch': 5.49} + 34%|███▍ | 127400/371472 [10:07:51<18:54:43, 3.58it/s] 34%|███▍ | 127401/371472 [10:07:51<18:29:24, 3.67it/s] 34%|███▍ | 127402/371472 [10:07:51<17:57:54, 3.77it/s] 34%|███▍ | 127403/371472 [10:07:51<18:41:39, 3.63it/s] 34%|███▍ | 127404/371472 [10:07:52<21:16:27, 3.19it/s] 34%|███▍ | 127405/371472 [10:07:52<19:56:48, 3.40it/s] 34%|███▍ | 127406/371472 [10:07:52<19:17:12, 3.52it/s] 34%|███▍ | 127407/371472 [10:07:53<19:58:39, 3.39it/s] 34%|███▍ | 127408/371472 [10:07:53<18:46:18, 3.61it/s] 34%|███▍ | 127409/371472 [10:07:53<18:44:53, 3.62it/s] 34%|███▍ | 127410/371472 [10:07:53<18:01:26, 3.76it/s] 34%|███▍ | 127411/371472 [10:07:54<18:01:22, 3.76it/s] 34%|███▍ | 127412/371472 [10:07:54<18:19:10, 3.70it/s] 34%|███▍ | 127413/371472 [10:07:54<19:28:24, 3.48it/s] 34%|███▍ | 127414/371472 [10:07:54<18:32:43, 3.66it/s] 34%|███▍ | 127415/371472 [10:07:55<17:56:50, 3.78it/s] 34%|███▍ | 127416/371472 [10:07:55<18:47:22, 3.61it/s] 34%|███▍ | 127417/371472 [10:07:55<18:48:13, 3.61it/s] 34%|███▍ | 127418/371472 [10:07:56<18:32:40, 3.66it/s] 34%|███▍ | 127419/371472 [10:07:56<18:09:22, 3.73it/s] 34%|███▍ | 127420/371472 [10:07:56<19:31:58, 3.47it/s] {'loss': 3.2572, 'learning_rate': 6.916061539787541e-07, 'epoch': 5.49} + 34%|███▍ | 127420/371472 [10:07:56<19:31:58, 3.47it/s] 34%|███▍ | 127421/371472 [10:07:56<19:01:39, 3.56it/s] 34%|███▍ | 127422/371472 [10:07:57<19:47:31, 3.43it/s] 34%|███▍ | 127423/371472 [10:07:57<19:35:48, 3.46it/s] 34%|███▍ | 127424/371472 [10:07:57<19:02:25, 3.56it/s] 34%|███▍ | 127425/371472 [10:07:58<20:25:18, 3.32it/s] 34%|███▍ | 127426/371472 [10:07:58<18:59:57, 3.57it/s] 34%|███▍ | 127427/371472 [10:07:58<19:38:56, 3.45it/s] 34%|███▍ | 127428/371472 [10:07:58<19:18:29, 3.51it/s] 34%|███▍ | 127429/371472 [10:07:59<19:10:08, 3.54it/s] 34%|███▍ | 127430/371472 [10:07:59<20:07:16, 3.37it/s] 34%|███▍ | 127431/371472 [10:07:59<19:14:49, 3.52it/s] 34%|███▍ | 127432/371472 [10:08:00<18:39:12, 3.63it/s] 34%|███▍ | 127433/371472 [10:08:00<18:21:20, 3.69it/s] 34%|███▍ | 127434/371472 [10:08:00<18:00:05, 3.77it/s] 34%|███▍ | 127435/371472 [10:08:00<19:17:51, 3.51it/s] 34%|███▍ | 127436/371472 [10:08:01<19:50:51, 3.42it/s] 34%|███▍ | 127437/371472 [10:08:01<20:22:32, 3.33it/s] 34%|███▍ | 127438/371472 [10:08:01<21:48:19, 3.11it/s] 34%|███▍ | 127439/371472 [10:08:02<20:54:34, 3.24it/s] 34%|███▍ | 127440/371472 [10:08:02<19:52:42, 3.41it/s] {'loss': 3.1622, 'learning_rate': 6.915576720032752e-07, 'epoch': 5.49} + 34%|███▍ | 127440/371472 [10:08:02<19:52:42, 3.41it/s] 34%|███▍ | 127441/371472 [10:08:02<18:52:32, 3.59it/s] 34%|███▍ | 127442/371472 [10:08:03<20:30:42, 3.30it/s] 34%|███▍ | 127443/371472 [10:08:03<19:51:49, 3.41it/s] 34%|███▍ | 127444/371472 [10:08:03<19:26:08, 3.49it/s] 34%|███▍ | 127445/371472 [10:08:03<18:54:37, 3.58it/s] 34%|███▍ | 127446/371472 [10:08:04<18:55:58, 3.58it/s] 34%|███▍ | 127447/371472 [10:08:04<18:35:26, 3.65it/s] 34%|███▍ | 127448/371472 [10:08:04<17:48:57, 3.80it/s] 34%|███▍ | 127449/371472 [10:08:04<17:40:05, 3.84it/s] 34%|███▍ | 127450/371472 [10:08:05<17:25:45, 3.89it/s] 34%|███▍ | 127451/371472 [10:08:05<18:12:52, 3.72it/s] 34%|███▍ | 127452/371472 [10:08:05<18:20:29, 3.70it/s] 34%|███▍ | 127453/371472 [10:08:05<19:14:43, 3.52it/s] 34%|███▍ | 127454/371472 [10:08:06<19:27:31, 3.48it/s] 34%|███▍ | 127455/371472 [10:08:06<19:42:12, 3.44it/s] 34%|███▍ | 127456/371472 [10:08:06<18:51:39, 3.59it/s] 34%|███▍ | 127457/371472 [10:08:07<19:54:24, 3.40it/s] 34%|███▍ | 127458/371472 [10:08:07<18:56:09, 3.58it/s] 34%|███▍ | 127459/371472 [10:08:07<18:50:51, 3.60it/s] 34%|███▍ | 127460/371472 [10:08:07<19:16:21, 3.52it/s] {'loss': 3.2954, 'learning_rate': 6.915091900277963e-07, 'epoch': 5.49} + 34%|███▍ | 127460/371472 [10:08:07<19:16:21, 3.52it/s] 34%|███▍ | 127461/371472 [10:08:08<19:21:05, 3.50it/s] 34%|███▍ | 127462/371472 [10:08:08<18:43:05, 3.62it/s] 34%|███▍ | 127463/371472 [10:08:08<18:52:23, 3.59it/s] 34%|███▍ | 127464/371472 [10:08:09<18:31:34, 3.66it/s] 34%|███▍ | 127465/371472 [10:08:09<18:39:47, 3.63it/s] 34%|███▍ | 127466/371472 [10:08:09<18:58:14, 3.57it/s] 34%|███▍ | 127467/371472 [10:08:09<18:24:54, 3.68it/s] 34%|███▍ | 127468/371472 [10:08:10<19:42:00, 3.44it/s] 34%|███▍ | 127469/371472 [10:08:10<19:46:10, 3.43it/s] 34%|███▍ | 127470/371472 [10:08:10<19:13:51, 3.52it/s] 34%|███▍ | 127471/371472 [10:08:11<18:59:34, 3.57it/s] 34%|███▍ | 127472/371472 [10:08:11<18:06:26, 3.74it/s] 34%|███▍ | 127473/371472 [10:08:11<18:37:10, 3.64it/s] 34%|███▍ | 127474/371472 [10:08:11<19:35:19, 3.46it/s] 34%|███▍ | 127475/371472 [10:08:12<20:06:52, 3.37it/s] 34%|███▍ | 127476/371472 [10:08:12<19:24:55, 3.49it/s] 34%|███▍ | 127477/371472 [10:08:12<19:20:30, 3.50it/s] 34%|███▍ | 127478/371472 [10:08:13<19:05:02, 3.55it/s] 34%|███▍ | 127479/371472 [10:08:13<19:05:38, 3.55it/s] 34%|███▍ | 127480/371472 [10:08:13<19:04:45, 3.55it/s] {'loss': 3.207, 'learning_rate': 6.914607080523175e-07, 'epoch': 5.49} + 34%|███▍ | 127480/371472 [10:08:13<19:04:45, 3.55it/s] 34%|███▍ | 127481/371472 [10:08:13<19:40:52, 3.44it/s] 34%|███▍ | 127482/371472 [10:08:14<19:52:56, 3.41it/s] 34%|███▍ | 127483/371472 [10:08:14<19:33:50, 3.46it/s] 34%|███▍ | 127484/371472 [10:08:14<18:46:21, 3.61it/s] 34%|███▍ | 127485/371472 [10:08:15<19:25:04, 3.49it/s] 34%|███▍ | 127486/371472 [10:08:15<20:03:17, 3.38it/s] 34%|███▍ | 127487/371472 [10:08:15<21:44:17, 3.12it/s] 34%|███▍ | 127488/371472 [10:08:16<20:35:37, 3.29it/s] 34%|███▍ | 127489/371472 [10:08:16<20:43:08, 3.27it/s] 34%|███▍ | 127490/371472 [10:08:16<21:40:12, 3.13it/s] 34%|███▍ | 127491/371472 [10:08:16<21:01:40, 3.22it/s] 34%|███▍ | 127492/371472 [10:08:17<19:52:42, 3.41it/s] 34%|███▍ | 127493/371472 [10:08:17<19:01:58, 3.56it/s] 34%|███▍ | 127494/371472 [10:08:17<19:57:27, 3.40it/s] 34%|███▍ | 127495/371472 [10:08:18<21:02:43, 3.22it/s] 34%|███▍ | 127496/371472 [10:08:18<20:12:53, 3.35it/s] 34%|███▍ | 127497/371472 [10:08:18<20:56:50, 3.24it/s] 34%|███▍ | 127498/371472 [10:08:19<20:15:53, 3.34it/s] 34%|███▍ | 127499/371472 [10:08:19<20:33:40, 3.30it/s] 34%|███▍ | 127500/371472 [10:08:19<19:48:00, 3.42it/s] {'loss': 3.2483, 'learning_rate': 6.914122260768385e-07, 'epoch': 5.49} + 34%|███▍ | 127500/371472 [10:08:19<19:48:00, 3.42it/s] 34%|███▍ | 127501/371472 [10:08:19<18:49:49, 3.60it/s] 34%|███▍ | 127502/371472 [10:08:20<18:52:54, 3.59it/s] 34%|███▍ | 127503/371472 [10:08:20<18:10:48, 3.73it/s] 34%|███▍ | 127504/371472 [10:08:20<18:07:53, 3.74it/s] 34%|███▍ | 127505/371472 [10:08:20<18:18:10, 3.70it/s] 34%|███▍ | 127506/371472 [10:08:21<19:27:21, 3.48it/s] 34%|███▍ | 127507/371472 [10:08:21<20:10:08, 3.36it/s] 34%|███▍ | 127508/371472 [10:08:21<20:26:05, 3.32it/s] 34%|███▍ | 127509/371472 [10:08:22<19:09:32, 3.54it/s] 34%|███▍ | 127510/371472 [10:08:22<19:00:00, 3.57it/s] 34%|███▍ | 127511/371472 [10:08:22<18:21:15, 3.69it/s] 34%|███▍ | 127512/371472 [10:08:22<18:28:53, 3.67it/s] 34%|███▍ | 127513/371472 [10:08:23<19:19:53, 3.51it/s] 34%|███▍ | 127514/371472 [10:08:23<19:39:44, 3.45it/s] 34%|███▍ | 127515/371472 [10:08:23<20:03:18, 3.38it/s] 34%|███▍ | 127516/371472 [10:08:24<20:25:47, 3.32it/s] 34%|███▍ | 127517/371472 [10:08:24<19:37:10, 3.45it/s] 34%|███▍ | 127518/371472 [10:08:24<19:33:45, 3.46it/s] 34%|███▍ | 127519/371472 [10:08:24<18:55:37, 3.58it/s] 34%|███▍ | 127520/371472 [10:08:25<19:11:05, 3.53it/s] {'loss': 3.2078, 'learning_rate': 6.913637441013596e-07, 'epoch': 5.49} + 34%|███▍ | 127520/371472 [10:08:25<19:11:05, 3.53it/s] 34%|███▍ | 127521/371472 [10:08:25<20:04:07, 3.38it/s] 34%|███▍ | 127522/371472 [10:08:25<19:35:19, 3.46it/s] 34%|███▍ | 127523/371472 [10:08:26<18:36:26, 3.64it/s] 34%|███▍ | 127524/371472 [10:08:26<18:32:34, 3.65it/s] 34%|███▍ | 127525/371472 [10:08:26<18:24:59, 3.68it/s] 34%|███▍ | 127526/371472 [10:08:26<18:09:35, 3.73it/s] 34%|███▍ | 127527/371472 [10:08:27<17:56:14, 3.78it/s] 34%|███▍ | 127528/371472 [10:08:27<17:18:42, 3.91it/s] 34%|███▍ | 127529/371472 [10:08:27<17:33:51, 3.86it/s] 34%|███▍ | 127530/371472 [10:08:27<17:42:42, 3.83it/s] 34%|███▍ | 127531/371472 [10:08:28<18:34:21, 3.65it/s] 34%|███▍ | 127532/371472 [10:08:28<20:05:16, 3.37it/s] 34%|███▍ | 127533/371472 [10:08:28<19:25:36, 3.49it/s] 34%|███▍ | 127534/371472 [10:08:29<19:20:27, 3.50it/s] 34%|███▍ | 127535/371472 [10:08:29<18:47:04, 3.61it/s] 34%|███▍ | 127536/371472 [10:08:29<18:21:17, 3.69it/s] 34%|███▍ | 127537/371472 [10:08:29<18:40:37, 3.63it/s] 34%|███▍ | 127538/371472 [10:08:30<18:38:50, 3.63it/s] 34%|███▍ | 127539/371472 [10:08:30<19:49:39, 3.42it/s] 34%|███▍ | 127540/371472 [10:08:30<20:09:58, 3.36it/s] {'loss': 3.1427, 'learning_rate': 6.913152621258807e-07, 'epoch': 5.49} + 34%|███▍ | 127540/371472 [10:08:30<20:09:58, 3.36it/s] 34%|███▍ | 127541/371472 [10:08:31<19:45:42, 3.43it/s] 34%|███▍ | 127542/371472 [10:08:31<19:01:12, 3.56it/s] 34%|███▍ | 127543/371472 [10:08:31<18:50:26, 3.60it/s] 34%|███▍ | 127544/371472 [10:08:31<18:22:44, 3.69it/s] 34%|███▍ | 127545/371472 [10:08:32<18:08:22, 3.74it/s] 34%|███▍ | 127546/371472 [10:08:32<18:10:37, 3.73it/s] 34%|███▍ | 127547/371472 [10:08:32<18:29:10, 3.67it/s] 34%|███▍ | 127548/371472 [10:08:32<18:44:56, 3.61it/s] 34%|███▍ | 127549/371472 [10:08:33<18:19:58, 3.70it/s] 34%|███▍ | 127550/371472 [10:08:33<18:30:52, 3.66it/s] 34%|███▍ | 127551/371472 [10:08:33<18:47:29, 3.61it/s] 34%|███▍ | 127552/371472 [10:08:34<18:23:01, 3.69it/s] 34%|███▍ | 127553/371472 [10:08:34<18:40:11, 3.63it/s] 34%|███▍ | 127554/371472 [10:08:34<18:34:25, 3.65it/s] 34%|███▍ | 127555/371472 [10:08:34<18:52:00, 3.59it/s] 34%|███▍ | 127556/371472 [10:08:35<18:52:50, 3.59it/s] 34%|███▍ | 127557/371472 [10:08:35<18:39:20, 3.63it/s] 34%|███▍ | 127558/371472 [10:08:35<18:15:46, 3.71it/s] 34%|███▍ | 127559/371472 [10:08:35<17:35:59, 3.85it/s] 34%|███▍ | 127560/371472 [10:08:36<17:34:40, 3.85it/s] {'loss': 3.2661, 'learning_rate': 6.912667801504019e-07, 'epoch': 5.49} + 34%|███▍ | 127560/371472 [10:08:36<17:34:40, 3.85it/s] 34%|███▍ | 127561/371472 [10:08:36<17:36:30, 3.85it/s] 34%|███▍ | 127562/371472 [10:08:36<17:28:11, 3.88it/s] 34%|███▍ | 127563/371472 [10:08:36<17:34:43, 3.85it/s] 34%|███▍ | 127564/371472 [10:08:37<16:58:56, 3.99it/s] 34%|███▍ | 127565/371472 [10:08:37<18:28:31, 3.67it/s] 34%|███▍ | 127566/371472 [10:08:37<17:42:36, 3.83it/s] 34%|███▍ | 127567/371472 [10:08:38<17:51:47, 3.79it/s] 34%|███▍ | 127568/371472 [10:08:38<17:25:51, 3.89it/s] 34%|███▍ | 127569/371472 [10:08:38<18:21:17, 3.69it/s] 34%|███▍ | 127570/371472 [10:08:38<18:27:24, 3.67it/s] 34%|███▍ | 127571/371472 [10:08:39<18:02:25, 3.76it/s] 34%|███▍ | 127572/371472 [10:08:39<19:49:04, 3.42it/s] 34%|███▍ | 127573/371472 [10:08:39<19:55:05, 3.40it/s] 34%|███▍ | 127574/371472 [10:08:40<19:14:00, 3.52it/s] 34%|███▍ | 127575/371472 [10:08:40<19:10:25, 3.53it/s] 34%|███▍ | 127576/371472 [10:08:40<18:29:27, 3.66it/s] 34%|███▍ | 127577/371472 [10:08:40<17:59:37, 3.77it/s] 34%|███▍ | 127578/371472 [10:08:41<17:47:17, 3.81it/s] 34%|███▍ | 127579/371472 [10:08:41<18:39:32, 3.63it/s] 34%|███▍ | 127580/371472 [10:08:41<18:32:50, 3.65it/s] {'loss': 3.1361, 'learning_rate': 6.91218298174923e-07, 'epoch': 5.5} + 34%|███▍ | 127580/371472 [10:08:41<18:32:50, 3.65it/s] 34%|███▍ | 127581/371472 [10:08:41<19:14:35, 3.52it/s] 34%|███▍ | 127582/371472 [10:08:42<19:32:53, 3.47it/s] 34%|███▍ | 127583/371472 [10:08:42<19:27:02, 3.48it/s] 34%|███▍ | 127584/371472 [10:08:42<19:23:08, 3.49it/s] 34%|███▍ | 127585/371472 [10:08:43<19:40:52, 3.44it/s] 34%|███▍ | 127586/371472 [10:08:43<18:44:06, 3.62it/s] 34%|███▍ | 127587/371472 [10:08:43<18:35:24, 3.64it/s] 34%|███▍ | 127588/371472 [10:08:43<19:00:54, 3.56it/s] 34%|███▍ | 127589/371472 [10:08:44<18:14:20, 3.71it/s] 34%|███▍ | 127590/371472 [10:08:44<18:47:38, 3.60it/s] 34%|███▍ | 127591/371472 [10:08:44<18:49:18, 3.60it/s] 34%|███▍ | 127592/371472 [10:08:45<19:18:18, 3.51it/s] 34%|███▍ | 127593/371472 [10:08:45<18:30:29, 3.66it/s] 34%|███▍ | 127594/371472 [10:08:45<18:39:26, 3.63it/s] 34%|███▍ | 127595/371472 [10:08:45<18:48:44, 3.60it/s] 34%|███▍ | 127596/371472 [10:08:46<20:07:31, 3.37it/s] 34%|███▍ | 127597/371472 [10:08:46<18:58:43, 3.57it/s] 34%|███▍ | 127598/371472 [10:08:46<18:44:09, 3.62it/s] 34%|███▍ | 127599/371472 [10:08:46<18:34:55, 3.65it/s] 34%|███▍ | 127600/371472 [10:08:47<18:35:18, 3.64it/s] {'loss': 3.3142, 'learning_rate': 6.911698161994441e-07, 'epoch': 5.5} + 34%|███▍ | 127600/371472 [10:08:47<18:35:18, 3.64it/s] 34%|███▍ | 127601/371472 [10:08:47<19:48:15, 3.42it/s] 34%|███▍ | 127602/371472 [10:08:47<19:22:49, 3.50it/s] 34%|███▍ | 127603/371472 [10:08:48<19:35:32, 3.46it/s] 34%|███▍ | 127604/371472 [10:08:48<19:07:21, 3.54it/s] 34%|███▍ | 127605/371472 [10:08:48<19:12:36, 3.53it/s] 34%|███▍ | 127606/371472 [10:08:49<21:09:44, 3.20it/s] 34%|███▍ | 127607/371472 [10:08:49<20:50:11, 3.25it/s] 34%|███▍ | 127608/371472 [10:08:49<20:35:09, 3.29it/s] 34%|███▍ | 127609/371472 [10:08:49<19:35:44, 3.46it/s] 34%|███▍ | 127610/371472 [10:08:50<19:26:25, 3.48it/s] 34%|███▍ | 127611/371472 [10:08:50<18:29:04, 3.66it/s] 34%|███▍ | 127612/371472 [10:08:50<19:51:38, 3.41it/s] 34%|███▍ | 127613/371472 [10:08:51<19:34:07, 3.46it/s] 34%|███▍ | 127614/371472 [10:08:51<18:59:22, 3.57it/s] 34%|███▍ | 127615/371472 [10:08:51<18:52:11, 3.59it/s] 34%|███▍ | 127616/371472 [10:08:51<19:01:58, 3.56it/s] 34%|███▍ | 127617/371472 [10:08:52<20:09:40, 3.36it/s] 34%|███▍ | 127618/371472 [10:08:52<19:27:20, 3.48it/s] 34%|███▍ | 127619/371472 [10:08:52<19:39:22, 3.45it/s] 34%|███▍ | 127620/371472 [10:08:53<19:04:16, 3.55it/s] {'loss': 3.0769, 'learning_rate': 6.911213342239651e-07, 'epoch': 5.5} + 34%|███▍ | 127620/371472 [10:08:53<19:04:16, 3.55it/s] 34%|███▍ | 127621/371472 [10:08:53<20:22:29, 3.32it/s] 34%|███▍ | 127622/371472 [10:08:53<19:51:26, 3.41it/s] 34%|███▍ | 127623/371472 [10:08:53<19:15:11, 3.52it/s] 34%|███▍ | 127624/371472 [10:08:54<18:14:20, 3.71it/s] 34%|███▍ | 127625/371472 [10:08:54<18:40:58, 3.63it/s] 34%|███▍ | 127626/371472 [10:08:54<18:44:57, 3.61it/s] 34%|███▍ | 127627/371472 [10:08:54<18:34:42, 3.65it/s] 34%|███▍ | 127628/371472 [10:08:55<20:42:33, 3.27it/s] 34%|███▍ | 127629/371472 [10:08:55<19:52:43, 3.41it/s] 34%|███▍ | 127630/371472 [10:08:55<19:42:59, 3.44it/s] 34%|███▍ | 127631/371472 [10:08:56<19:11:12, 3.53it/s] 34%|███▍ | 127632/371472 [10:08:56<18:13:15, 3.72it/s] 34%|███▍ | 127633/371472 [10:08:56<17:53:05, 3.79it/s] 34%|███▍ | 127634/371472 [10:08:57<20:09:39, 3.36it/s] 34%|███▍ | 127635/371472 [10:08:57<19:49:24, 3.42it/s] 34%|███▍ | 127636/371472 [10:08:57<20:05:02, 3.37it/s] 34%|███▍ | 127637/371472 [10:08:57<19:16:36, 3.51it/s] 34%|███▍ | 127638/371472 [10:08:58<20:49:23, 3.25it/s] 34%|███▍ | 127639/371472 [10:08:58<20:12:26, 3.35it/s] 34%|███▍ | 127640/371472 [10:08:58<19:54:12, 3.40it/s] {'loss': 3.3286, 'learning_rate': 6.910728522484862e-07, 'epoch': 5.5} + 34%|███▍ | 127640/371472 [10:08:58<19:54:12, 3.40it/s] 34%|███▍ | 127641/371472 [10:08:59<18:35:34, 3.64it/s] 34%|███▍ | 127642/371472 [10:08:59<18:38:34, 3.63it/s] 34%|███▍ | 127643/371472 [10:08:59<18:53:06, 3.59it/s] 34%|███▍ | 127644/371472 [10:08:59<19:04:27, 3.55it/s] 34%|███▍ | 127645/371472 [10:09:00<18:32:05, 3.65it/s] 34%|███▍ | 127646/371472 [10:09:00<18:58:08, 3.57it/s] 34%|███▍ | 127647/371472 [10:09:00<18:27:27, 3.67it/s] 34%|███▍ | 127648/371472 [10:09:00<18:08:37, 3.73it/s] 34%|███▍ | 127649/371472 [10:09:01<18:37:19, 3.64it/s] 34%|███▍ | 127650/371472 [10:09:01<17:45:04, 3.82it/s] 34%|███▍ | 127651/371472 [10:09:01<18:14:27, 3.71it/s] 34%|███▍ | 127652/371472 [10:09:02<19:28:49, 3.48it/s] 34%|███▍ | 127653/371472 [10:09:02<18:54:42, 3.58it/s] 34%|███▍ | 127654/371472 [10:09:02<18:13:47, 3.72it/s] 34%|███▍ | 127655/371472 [10:09:02<17:57:44, 3.77it/s] 34%|███▍ | 127656/371472 [10:09:03<18:09:33, 3.73it/s] 34%|███▍ | 127657/371472 [10:09:03<18:36:21, 3.64it/s] 34%|███▍ | 127658/371472 [10:09:03<17:59:58, 3.76it/s] 34%|███▍ | 127659/371472 [10:09:03<18:47:50, 3.60it/s] 34%|███▍ | 127660/371472 [10:09:04<20:02:13, 3.38it/s] {'loss': 3.2604, 'learning_rate': 6.910243702730073e-07, 'epoch': 5.5} + 34%|███▍ | 127660/371472 [10:09:04<20:02:13, 3.38it/s] 34%|███▍ | 127661/371472 [10:09:04<19:51:42, 3.41it/s] 34%|███▍ | 127662/371472 [10:09:04<19:46:48, 3.42it/s] 34%|███▍ | 127663/371472 [10:09:05<19:19:14, 3.51it/s] 34%|███▍ | 127664/371472 [10:09:05<20:21:25, 3.33it/s] 34%|███▍ | 127665/371472 [10:09:05<19:57:12, 3.39it/s] 34%|███▍ | 127666/371472 [10:09:06<19:17:46, 3.51it/s] 34%|███▍ | 127667/371472 [10:09:06<18:35:40, 3.64it/s] 34%|███▍ | 127668/371472 [10:09:06<18:15:23, 3.71it/s] 34%|███▍ | 127669/371472 [10:09:06<19:54:47, 3.40it/s] 34%|███▍ | 127670/371472 [10:09:07<19:10:34, 3.53it/s] 34%|███▍ | 127671/371472 [10:09:07<19:19:46, 3.50it/s] 34%|███▍ | 127672/371472 [10:09:07<18:41:00, 3.62it/s] 34%|███▍ | 127673/371472 [10:09:08<20:07:32, 3.36it/s] 34%|███▍ | 127674/371472 [10:09:08<20:15:09, 3.34it/s] 34%|███▍ | 127675/371472 [10:09:08<19:50:19, 3.41it/s] 34%|███▍ | 127676/371472 [10:09:08<20:41:05, 3.27it/s] 34%|███▍ | 127677/371472 [10:09:09<20:05:14, 3.37it/s] 34%|███▍ | 127678/371472 [10:09:09<19:04:06, 3.55it/s] 34%|███▍ | 127679/371472 [10:09:09<18:49:52, 3.60it/s] 34%|███▍ | 127680/371472 [10:09:10<18:10:15, 3.73it/s] {'loss': 3.188, 'learning_rate': 6.909758882975284e-07, 'epoch': 5.5} + 34%|███▍ | 127680/371472 [10:09:10<18:10:15, 3.73it/s] 34%|███▍ | 127681/371472 [10:09:10<19:21:32, 3.50it/s] 34%|███▍ | 127682/371472 [10:09:10<19:21:35, 3.50it/s] 34%|███▍ | 127683/371472 [10:09:10<19:44:19, 3.43it/s] 34%|███▍ | 127684/371472 [10:09:11<19:14:25, 3.52it/s] 34%|███▍ | 127685/371472 [10:09:11<18:34:16, 3.65it/s] 34%|███▍ | 127686/371472 [10:09:11<18:23:35, 3.68it/s] 34%|███▍ | 127687/371472 [10:09:11<17:41:06, 3.83it/s] 34%|███▍ | 127688/371472 [10:09:12<17:25:36, 3.89it/s] 34%|███▍ | 127689/371472 [10:09:12<17:39:38, 3.83it/s] 34%|███▍ | 127690/371472 [10:09:12<17:52:54, 3.79it/s] 34%|███▍ | 127691/371472 [10:09:13<18:24:48, 3.68it/s] 34%|███▍ | 127692/371472 [10:09:13<17:58:02, 3.77it/s] 34%|███▍ | 127693/371472 [10:09:13<19:42:37, 3.44it/s] 34%|███▍ | 127694/371472 [10:09:13<19:27:20, 3.48it/s] 34%|███▍ | 127695/371472 [10:09:14<20:31:33, 3.30it/s] 34%|███▍ | 127696/371472 [10:09:14<20:00:25, 3.38it/s] 34%|███▍ | 127697/371472 [10:09:14<19:05:30, 3.55it/s] 34%|███▍ | 127698/371472 [10:09:15<20:07:11, 3.37it/s] 34%|███▍ | 127699/371472 [10:09:15<20:22:08, 3.32it/s] 34%|███▍ | 127700/371472 [10:09:15<19:50:47, 3.41it/s] {'loss': 3.0901, 'learning_rate': 6.909274063220496e-07, 'epoch': 5.5} + 34%|███▍ | 127700/371472 [10:09:15<19:50:47, 3.41it/s] 34%|███▍ | 127701/371472 [10:09:15<18:58:49, 3.57it/s] 34%|███▍ | 127702/371472 [10:09:16<19:37:48, 3.45it/s] 34%|███▍ | 127703/371472 [10:09:16<19:50:38, 3.41it/s] 34%|███▍ | 127704/371472 [10:09:16<19:30:15, 3.47it/s] 34%|███▍ | 127705/371472 [10:09:17<19:06:52, 3.54it/s] 34%|███▍ | 127706/371472 [10:09:17<20:24:56, 3.32it/s] 34%|███▍ | 127707/371472 [10:09:17<21:49:50, 3.10it/s] 34%|███▍ | 127708/371472 [10:09:18<20:29:04, 3.31it/s] 34%|███▍ | 127709/371472 [10:09:18<21:23:07, 3.17it/s] 34%|███▍ | 127710/371472 [10:09:18<20:50:35, 3.25it/s] 34%|███▍ | 127711/371472 [10:09:19<20:47:46, 3.26it/s] 34%|███▍ | 127712/371472 [10:09:19<19:52:01, 3.41it/s] 34%|███▍ | 127713/371472 [10:09:19<19:14:56, 3.52it/s] 34%|███▍ | 127714/371472 [10:09:19<19:02:41, 3.56it/s] 34%|███▍ | 127715/371472 [10:09:20<20:09:14, 3.36it/s] 34%|███▍ | 127716/371472 [10:09:20<19:28:34, 3.48it/s] 34%|███▍ | 127717/371472 [10:09:20<18:44:31, 3.61it/s] 34%|███▍ | 127718/371472 [10:09:20<18:47:00, 3.60it/s] 34%|███▍ | 127719/371472 [10:09:21<18:33:19, 3.65it/s] 34%|███▍ | 127720/371472 [10:09:21<18:48:34, 3.60it/s] {'loss': 3.1472, 'learning_rate': 6.908789243465709e-07, 'epoch': 5.5} + 34%|███▍ | 127720/371472 [10:09:21<18:48:34, 3.60it/s] 34%|███▍ | 127721/371472 [10:09:21<19:05:45, 3.55it/s] 34%|███▍ | 127722/371472 [10:09:22<18:22:29, 3.68it/s] 34%|███▍ | 127723/371472 [10:09:22<18:41:25, 3.62it/s] 34%|███▍ | 127724/371472 [10:09:22<18:35:38, 3.64it/s] 34%|███▍ | 127725/371472 [10:09:22<18:49:50, 3.60it/s] 34%|███▍ | 127726/371472 [10:09:23<18:58:16, 3.57it/s] 34%|███▍ | 127727/371472 [10:09:23<19:08:03, 3.54it/s] 34%|███▍ | 127728/371472 [10:09:23<19:00:04, 3.56it/s] 34%|███▍ | 127729/371472 [10:09:24<18:52:33, 3.59it/s] 34%|███▍ | 127730/371472 [10:09:24<18:53:21, 3.58it/s] 34%|███▍ | 127731/371472 [10:09:24<18:41:16, 3.62it/s] 34%|███▍ | 127732/371472 [10:09:24<18:55:17, 3.58it/s] 34%|███▍ | 127733/371472 [10:09:25<18:29:33, 3.66it/s] 34%|███▍ | 127734/371472 [10:09:25<18:54:57, 3.58it/s] 34%|███▍ | 127735/371472 [10:09:25<19:15:47, 3.51it/s] 34%|███▍ | 127736/371472 [10:09:25<19:04:03, 3.55it/s] 34%|███▍ | 127737/371472 [10:09:26<18:22:23, 3.68it/s] 34%|███▍ | 127738/371472 [10:09:26<18:05:28, 3.74it/s] 34%|███▍ | 127739/371472 [10:09:26<19:26:25, 3.48it/s] 34%|███▍ | 127740/371472 [10:09:27<18:43:32, 3.62it/s] {'loss': 3.3285, 'learning_rate': 6.908304423710918e-07, 'epoch': 5.5} + 34%|███▍ | 127740/371472 [10:09:27<18:43:32, 3.62it/s] 34%|███▍ | 127741/371472 [10:09:27<18:33:21, 3.65it/s] 34%|███▍ | 127742/371472 [10:09:27<19:16:41, 3.51it/s] 34%|███▍ | 127743/371472 [10:09:27<18:44:34, 3.61it/s] 34%|███▍ | 127744/371472 [10:09:28<19:13:28, 3.52it/s] 34%|███▍ | 127745/371472 [10:09:28<19:12:08, 3.53it/s] 34%|███▍ | 127746/371472 [10:09:28<19:35:30, 3.46it/s] 34%|███▍ | 127747/371472 [10:09:29<18:24:10, 3.68it/s] 34%|███▍ | 127748/371472 [10:09:29<17:57:22, 3.77it/s] 34%|███▍ | 127749/371472 [10:09:29<17:32:51, 3.86it/s] 34%|███▍ | 127750/371472 [10:09:29<17:14:35, 3.93it/s] 34%|███▍ | 127751/371472 [10:09:30<17:19:03, 3.91it/s] 34%|███▍ | 127752/371472 [10:09:30<17:19:47, 3.91it/s] 34%|███▍ | 127753/371472 [10:09:30<17:26:22, 3.88it/s] 34%|███▍ | 127754/371472 [10:09:30<17:25:28, 3.89it/s] 34%|███▍ | 127755/371472 [10:09:31<17:56:39, 3.77it/s] 34%|███▍ | 127756/371472 [10:09:31<19:36:59, 3.45it/s] 34%|███▍ | 127757/371472 [10:09:31<19:04:23, 3.55it/s] 34%|███▍ | 127758/371472 [10:09:31<18:42:55, 3.62it/s] 34%|███▍ | 127759/371472 [10:09:32<18:05:30, 3.74it/s] 34%|███▍ | 127760/371472 [10:09:32<17:51:34, 3.79it/s] {'loss': 3.3031, 'learning_rate': 6.907819603956128e-07, 'epoch': 5.5} + 34%|███▍ | 127760/371472 [10:09:32<17:51:34, 3.79it/s] 34%|███▍ | 127761/371472 [10:09:32<17:27:57, 3.88it/s] 34%|███▍ | 127762/371472 [10:09:32<17:47:47, 3.80it/s] 34%|███▍ | 127763/371472 [10:09:33<17:46:35, 3.81it/s] 34%|███▍ | 127764/371472 [10:09:33<19:12:23, 3.52it/s] 34%|███▍ | 127765/371472 [10:09:33<18:19:10, 3.70it/s] 34%|███▍ | 127766/371472 [10:09:34<17:54:11, 3.78it/s] 34%|███▍ | 127767/371472 [10:09:34<17:47:09, 3.81it/s] 34%|███▍ | 127768/371472 [10:09:34<17:36:31, 3.84it/s] 34%|███▍ | 127769/371472 [10:09:34<17:24:11, 3.89it/s] 34%|███▍ | 127770/371472 [10:09:35<19:08:24, 3.54it/s] 34%|███▍ | 127771/371472 [10:09:35<18:32:43, 3.65it/s] 34%|███▍ | 127772/371472 [10:09:35<19:13:38, 3.52it/s] 34%|███▍ | 127773/371472 [10:09:35<19:17:16, 3.51it/s] 34%|███▍ | 127774/371472 [10:09:36<20:24:47, 3.32it/s] 34%|███▍ | 127775/371472 [10:09:36<19:35:07, 3.46it/s] 34%|███▍ | 127776/371472 [10:09:36<19:25:19, 3.49it/s] 34%|███▍ | 127777/371472 [10:09:37<18:32:25, 3.65it/s] 34%|███▍ | 127778/371472 [10:09:37<18:20:40, 3.69it/s] 34%|███▍ | 127779/371472 [10:09:37<18:21:30, 3.69it/s] 34%|███▍ | 127780/371472 [10:09:37<18:24:37, 3.68it/s] {'loss': 3.4402, 'learning_rate': 6.90733478420134e-07, 'epoch': 5.5} + 34%|███▍ | 127780/371472 [10:09:37<18:24:37, 3.68it/s] 34%|███▍ | 127781/371472 [10:09:38<17:35:40, 3.85it/s] 34%|███▍ | 127782/371472 [10:09:38<19:58:24, 3.39it/s] 34%|███▍ | 127783/371472 [10:09:38<20:41:22, 3.27it/s] 34%|███▍ | 127784/371472 [10:09:39<19:18:14, 3.51it/s] 34%|███▍ | 127785/371472 [10:09:39<18:46:36, 3.61it/s] 34%|███▍ | 127786/371472 [10:09:39<18:56:16, 3.57it/s] 34%|███▍ | 127787/371472 [10:09:39<18:21:31, 3.69it/s] 34%|███▍ | 127788/371472 [10:09:40<18:47:52, 3.60it/s] 34%|███▍ | 127789/371472 [10:09:40<18:37:12, 3.64it/s] 34%|███▍ | 127790/371472 [10:09:40<19:40:44, 3.44it/s] 34%|███▍ | 127791/371472 [10:09:41<20:34:39, 3.29it/s] 34%|███▍ | 127792/371472 [10:09:41<21:36:37, 3.13it/s] 34%|███▍ | 127793/371472 [10:09:41<20:10:33, 3.35it/s] 34%|███▍ | 127794/371472 [10:09:41<19:16:02, 3.51it/s] 34%|███▍ | 127795/371472 [10:09:42<18:22:56, 3.68it/s] 34%|███▍ | 127796/371472 [10:09:42<18:13:33, 3.71it/s] 34%|███▍ | 127797/371472 [10:09:42<17:46:12, 3.81it/s] 34%|███▍ | 127798/371472 [10:09:43<17:58:16, 3.77it/s] 34%|███▍ | 127799/371472 [10:09:43<18:26:46, 3.67it/s] 34%|███▍ | 127800/371472 [10:09:43<17:52:45, 3.79it/s] {'loss': 3.2753, 'learning_rate': 6.906849964446551e-07, 'epoch': 5.5} + 34%|███▍ | 127800/371472 [10:09:43<17:52:45, 3.79it/s] 34%|███▍ | 127801/371472 [10:09:43<18:35:02, 3.64it/s] 34%|███▍ | 127802/371472 [10:09:44<17:50:13, 3.79it/s] 34%|███▍ | 127803/371472 [10:09:44<18:04:20, 3.75it/s] 34%|███▍ | 127804/371472 [10:09:44<17:52:29, 3.79it/s] 34%|███▍ | 127805/371472 [10:09:44<18:05:09, 3.74it/s] 34%|███▍ | 127806/371472 [10:09:45<17:47:04, 3.81it/s] 34%|███▍ | 127807/371472 [10:09:45<17:50:19, 3.79it/s] 34%|███▍ | 127808/371472 [10:09:45<17:43:11, 3.82it/s] 34%|███▍ | 127809/371472 [10:09:45<19:18:27, 3.51it/s] 34%|███▍ | 127810/371472 [10:09:46<20:53:38, 3.24it/s] 34%|███▍ | 127811/371472 [10:09:46<21:42:45, 3.12it/s] 34%|███▍ | 127812/371472 [10:09:47<22:03:20, 3.07it/s] 34%|███▍ | 127813/371472 [10:09:47<20:17:40, 3.34it/s] 34%|███▍ | 127814/371472 [10:09:47<20:04:16, 3.37it/s] 34%|███▍ | 127815/371472 [10:09:47<19:02:53, 3.55it/s] 34%|███▍ | 127816/371472 [10:09:48<19:24:59, 3.49it/s] 34%|███▍ | 127817/371472 [10:09:48<18:57:23, 3.57it/s] 34%|███▍ | 127818/371472 [10:09:48<19:05:53, 3.54it/s] 34%|███▍ | 127819/371472 [10:09:48<18:26:13, 3.67it/s] 34%|███▍ | 127820/371472 [10:09:49<17:42:26, 3.82it/s] {'loss': 3.1753, 'learning_rate': 6.906365144691762e-07, 'epoch': 5.51} + 34%|███▍ | 127820/371472 [10:09:49<17:42:26, 3.82it/s] 34%|███▍ | 127821/371472 [10:09:49<17:25:33, 3.88it/s] 34%|███▍ | 127822/371472 [10:09:49<17:14:03, 3.93it/s] 34%|███▍ | 127823/371472 [10:09:49<17:11:41, 3.94it/s] 34%|███▍ | 127824/371472 [10:09:50<19:17:06, 3.51it/s] 34%|███▍ | 127825/371472 [10:09:50<18:57:55, 3.57it/s] 34%|███▍ | 127826/371472 [10:09:50<18:00:40, 3.76it/s] 34%|███▍ | 127827/371472 [10:09:51<17:51:43, 3.79it/s] 34%|███▍ | 127828/371472 [10:09:51<18:27:48, 3.67it/s] 34%|███▍ | 127829/371472 [10:09:51<19:03:24, 3.55it/s] 34%|███▍ | 127830/371472 [10:09:51<18:08:14, 3.73it/s] 34%|███▍ | 127831/371472 [10:09:52<19:45:27, 3.43it/s] 34%|███▍ | 127832/371472 [10:09:52<19:23:04, 3.49it/s] 34%|███▍ | 127833/371472 [10:09:52<19:18:29, 3.51it/s] 34%|███▍ | 127834/371472 [10:09:53<19:08:22, 3.54it/s] 34%|███▍ | 127835/371472 [10:09:53<18:58:20, 3.57it/s] 34%|███▍ | 127836/371472 [10:09:53<18:33:17, 3.65it/s] 34%|███▍ | 127837/371472 [10:09:53<18:54:47, 3.58it/s] 34%|███▍ | 127838/371472 [10:09:54<18:22:39, 3.68it/s] 34%|███▍ | 127839/371472 [10:09:54<17:53:56, 3.78it/s] 34%|███▍ | 127840/371472 [10:09:54<17:33:21, 3.85it/s] {'loss': 3.083, 'learning_rate': 6.905880324936973e-07, 'epoch': 5.51} + 34%|███▍ | 127840/371472 [10:09:54<17:33:21, 3.85it/s] 34%|███▍ | 127841/371472 [10:09:54<18:43:00, 3.62it/s] 34%|███▍ | 127842/371472 [10:09:55<19:41:54, 3.44it/s] 34%|███▍ | 127843/371472 [10:09:55<19:21:02, 3.50it/s] 34%|███▍ | 127844/371472 [10:09:55<19:42:35, 3.43it/s] 34%|███▍ | 127845/371472 [10:09:56<20:04:29, 3.37it/s] 34%|███▍ | 127846/371472 [10:09:56<19:53:15, 3.40it/s] 34%|███▍ | 127847/371472 [10:09:56<21:31:45, 3.14it/s] 34%|███▍ | 127848/371472 [10:09:57<20:22:42, 3.32it/s] 34%|███▍ | 127849/371472 [10:09:57<20:44:33, 3.26it/s] 34%|███▍ | 127850/371472 [10:09:57<20:21:17, 3.32it/s] 34%|███▍ | 127851/371472 [10:09:57<20:06:02, 3.37it/s] 34%|███▍ | 127852/371472 [10:09:58<19:18:19, 3.51it/s] 34%|███▍ | 127853/371472 [10:09:58<19:59:55, 3.38it/s] 34%|███▍ | 127854/371472 [10:09:58<20:32:30, 3.29it/s] 34%|███▍ | 127855/371472 [10:09:59<19:55:49, 3.40it/s] 34%|███▍ | 127856/371472 [10:09:59<20:12:51, 3.35it/s] 34%|███▍ | 127857/371472 [10:09:59<19:33:57, 3.46it/s] 34%|███▍ | 127858/371472 [10:09:59<19:21:49, 3.49it/s] 34%|███▍ | 127859/371472 [10:10:00<19:18:33, 3.50it/s] 34%|███▍ | 127860/371472 [10:10:00<19:26:33, 3.48it/s] {'loss': 3.2826, 'learning_rate': 6.905395505182185e-07, 'epoch': 5.51} + 34%|███▍ | 127860/371472 [10:10:00<19:26:33, 3.48it/s] 34%|███▍ | 127861/371472 [10:10:00<19:29:29, 3.47it/s] 34%|███▍ | 127862/371472 [10:10:01<18:58:03, 3.57it/s] 34%|███▍ | 127863/371472 [10:10:01<18:53:10, 3.58it/s] 34%|███▍ | 127864/371472 [10:10:01<18:13:45, 3.71it/s] 34%|███▍ | 127865/371472 [10:10:01<18:04:12, 3.74it/s] 34%|███▍ | 127866/371472 [10:10:02<17:33:09, 3.86it/s] 34%|███▍ | 127867/371472 [10:10:02<17:38:01, 3.84it/s] 34%|███▍ | 127868/371472 [10:10:02<18:10:23, 3.72it/s] 34%|███▍ | 127869/371472 [10:10:03<19:17:26, 3.51it/s] 34%|███▍ | 127870/371472 [10:10:03<20:47:09, 3.26it/s] 34%|███▍ | 127871/371472 [10:10:03<19:51:26, 3.41it/s] 34%|███▍ | 127872/371472 [10:10:03<18:56:20, 3.57it/s] 34%|███▍ | 127873/371472 [10:10:04<18:16:44, 3.70it/s] 34%|███▍ | 127874/371472 [10:10:04<17:50:02, 3.79it/s] 34%|███▍ | 127875/371472 [10:10:04<18:56:54, 3.57it/s] 34%|███▍ | 127876/371472 [10:10:04<18:49:06, 3.60it/s] 34%|███▍ | 127877/371472 [10:10:05<18:09:58, 3.72it/s] 34%|███▍ | 127878/371472 [10:10:05<18:24:34, 3.68it/s] 34%|███▍ | 127879/371472 [10:10:05<18:47:14, 3.60it/s] 34%|███▍ | 127880/371472 [10:10:06<18:26:18, 3.67it/s] {'loss': 3.3222, 'learning_rate': 6.904910685427395e-07, 'epoch': 5.51} + 34%|███▍ | 127880/371472 [10:10:06<18:26:18, 3.67it/s] 34%|███▍ | 127881/371472 [10:10:06<18:16:03, 3.70it/s] 34%|███▍ | 127882/371472 [10:10:06<18:51:20, 3.59it/s] 34%|███▍ | 127883/371472 [10:10:06<18:11:35, 3.72it/s] 34%|███▍ | 127884/371472 [10:10:07<17:59:13, 3.76it/s] 34%|███▍ | 127885/371472 [10:10:07<18:33:32, 3.65it/s] 34%|███▍ | 127886/371472 [10:10:07<19:00:46, 3.56it/s] 34%|███▍ | 127887/371472 [10:10:07<18:34:26, 3.64it/s] 34%|███▍ | 127888/371472 [10:10:08<18:39:11, 3.63it/s] 34%|███▍ | 127889/371472 [10:10:08<18:39:53, 3.63it/s] 34%|███▍ | 127890/371472 [10:10:08<19:31:07, 3.47it/s] 34%|███▍ | 127891/371472 [10:10:09<18:56:08, 3.57it/s] 34%|███▍ | 127892/371472 [10:10:09<19:24:51, 3.49it/s] 34%|███▍ | 127893/371472 [10:10:09<19:45:54, 3.42it/s] 34%|███▍ | 127894/371472 [10:10:09<19:18:28, 3.50it/s] 34%|███▍ | 127895/371472 [10:10:10<18:45:45, 3.61it/s] 34%|███▍ | 127896/371472 [10:10:10<18:36:03, 3.64it/s] 34%|███▍ | 127897/371472 [10:10:10<19:22:35, 3.49it/s] 34%|███▍ | 127898/371472 [10:10:11<18:40:19, 3.62it/s] 34%|███▍ | 127899/371472 [10:10:11<18:56:24, 3.57it/s] 34%|███▍ | 127900/371472 [10:10:11<18:27:42, 3.66it/s] {'loss': 3.33, 'learning_rate': 6.904425865672606e-07, 'epoch': 5.51} + 34%|███▍ | 127900/371472 [10:10:11<18:27:42, 3.66it/s] 34%|███▍ | 127901/371472 [10:10:11<19:34:24, 3.46it/s] 34%|███▍ | 127902/371472 [10:10:12<18:51:19, 3.59it/s] 34%|███▍ | 127903/371472 [10:10:12<20:25:37, 3.31it/s] 34%|███▍ | 127904/371472 [10:10:12<19:30:45, 3.47it/s] 34%|███▍ | 127905/371472 [10:10:13<18:49:35, 3.59it/s] 34%|███▍ | 127906/371472 [10:10:13<19:12:48, 3.52it/s] 34%|███▍ | 127907/371472 [10:10:13<19:00:45, 3.56it/s] 34%|███▍ | 127908/371472 [10:10:13<19:13:05, 3.52it/s] 34%|███▍ | 127909/371472 [10:10:14<18:18:31, 3.70it/s] 34%|███▍ | 127910/371472 [10:10:14<18:13:52, 3.71it/s] 34%|███▍ | 127911/371472 [10:10:14<18:15:25, 3.71it/s] 34%|███▍ | 127912/371472 [10:10:15<19:28:54, 3.47it/s] 34%|███▍ | 127913/371472 [10:10:15<18:56:20, 3.57it/s] 34%|███▍ | 127914/371472 [10:10:15<19:21:45, 3.49it/s] 34%|███▍ | 127915/371472 [10:10:15<19:21:13, 3.50it/s] 34%|███▍ | 127916/371472 [10:10:16<19:51:01, 3.41it/s] 34%|███▍ | 127917/371472 [10:10:16<18:58:17, 3.57it/s] 34%|███▍ | 127918/371472 [10:10:16<18:59:31, 3.56it/s] 34%|███▍ | 127919/371472 [10:10:16<18:40:14, 3.62it/s] 34%|███▍ | 127920/371472 [10:10:17<18:55:20, 3.58it/s] {'loss': 3.2663, 'learning_rate': 6.903941045917817e-07, 'epoch': 5.51} + 34%|███▍ | 127920/371472 [10:10:17<18:55:20, 3.58it/s] 34%|███▍ | 127921/371472 [10:10:17<18:56:44, 3.57it/s] 34%|███▍ | 127922/371472 [10:10:17<19:30:14, 3.47it/s] 34%|███▍ | 127923/371472 [10:10:18<19:04:36, 3.55it/s] 34%|███▍ | 127924/371472 [10:10:18<19:14:27, 3.52it/s] 34%|███▍ | 127925/371472 [10:10:18<18:52:04, 3.59it/s] 34%|███▍ | 127926/371472 [10:10:18<18:27:09, 3.67it/s] 34%|███▍ | 127927/371472 [10:10:19<17:53:33, 3.78it/s] 34%|███▍ | 127928/371472 [10:10:19<18:16:13, 3.70it/s] 34%|███▍ | 127929/371472 [10:10:19<18:19:12, 3.69it/s] 34%|███▍ | 127930/371472 [10:10:19<17:43:59, 3.81it/s] 34%|███▍ | 127931/371472 [10:10:20<18:19:01, 3.69it/s] 34%|███▍ | 127932/371472 [10:10:20<18:59:43, 3.56it/s] 34%|███▍ | 127933/371472 [10:10:20<19:57:04, 3.39it/s] 34%|███▍ | 127934/371472 [10:10:21<19:03:17, 3.55it/s] 34%|███▍ | 127935/371472 [10:10:21<19:15:49, 3.51it/s] 34%|███▍ | 127936/371472 [10:10:21<18:30:10, 3.66it/s] 34%|███▍ | 127937/371472 [10:10:21<18:01:03, 3.75it/s] 34%|███▍ | 127938/371472 [10:10:22<18:10:46, 3.72it/s] 34%|███▍ | 127939/371472 [10:10:22<18:00:00, 3.76it/s] 34%|███▍ | 127940/371472 [10:10:22<19:21:57, 3.49it/s] {'loss': 3.1387, 'learning_rate': 6.903456226163029e-07, 'epoch': 5.51} + 34%|███▍ | 127940/371472 [10:10:22<19:21:57, 3.49it/s] 34%|███▍ | 127941/371472 [10:10:23<19:08:32, 3.53it/s] 34%|███▍ | 127942/371472 [10:10:23<19:36:36, 3.45it/s] 34%|███▍ | 127943/371472 [10:10:23<19:07:25, 3.54it/s] 34%|███▍ | 127944/371472 [10:10:23<19:05:36, 3.54it/s] 34%|███▍ | 127945/371472 [10:10:24<18:28:52, 3.66it/s] 34%|███▍ | 127946/371472 [10:10:24<18:34:35, 3.64it/s] 34%|███▍ | 127947/371472 [10:10:24<17:58:23, 3.76it/s] 34%|███▍ | 127948/371472 [10:10:25<18:17:44, 3.70it/s] 34%|███▍ | 127949/371472 [10:10:25<18:37:05, 3.63it/s] 34%|███▍ | 127950/371472 [10:10:25<19:31:24, 3.46it/s] 34%|███▍ | 127951/371472 [10:10:25<19:26:26, 3.48it/s] 34%|███▍ | 127952/371472 [10:10:26<18:38:54, 3.63it/s] 34%|███▍ | 127953/371472 [10:10:26<19:32:15, 3.46it/s] 34%|███▍ | 127954/371472 [10:10:26<19:17:54, 3.51it/s] 34%|███▍ | 127955/371472 [10:10:27<18:56:32, 3.57it/s] 34%|███▍ | 127956/371472 [10:10:27<18:23:29, 3.68it/s] 34%|███▍ | 127957/371472 [10:10:27<18:05:54, 3.74it/s] 34%|███▍ | 127958/371472 [10:10:27<17:40:20, 3.83it/s] 34%|███▍ | 127959/371472 [10:10:28<17:38:59, 3.83it/s] 34%|███▍ | 127960/371472 [10:10:28<17:06:47, 3.95it/s] {'loss': 3.1768, 'learning_rate': 6.90297140640824e-07, 'epoch': 5.51} + 34%|███▍ | 127960/371472 [10:10:28<17:06:47, 3.95it/s] 34%|███▍ | 127961/371472 [10:10:28<17:44:44, 3.81it/s] 34%|███▍ | 127962/371472 [10:10:28<17:56:14, 3.77it/s] 34%|███▍ | 127963/371472 [10:10:29<17:53:11, 3.78it/s] 34%|███▍ | 127964/371472 [10:10:29<17:44:26, 3.81it/s] 34%|███▍ | 127965/371472 [10:10:29<17:50:34, 3.79it/s] 34%|███▍ | 127966/371472 [10:10:29<18:42:31, 3.62it/s] 34%|███▍ | 127967/371472 [10:10:30<18:10:23, 3.72it/s] 34%|███▍ | 127968/371472 [10:10:30<18:12:10, 3.72it/s] 34%|███▍ | 127969/371472 [10:10:30<20:20:13, 3.33it/s] 34%|███▍ | 127970/371472 [10:10:31<19:53:03, 3.40it/s] 34%|███▍ | 127971/371472 [10:10:31<18:56:33, 3.57it/s] 34%|███▍ | 127972/371472 [10:10:31<18:26:21, 3.67it/s] 34%|███▍ | 127973/371472 [10:10:31<18:33:37, 3.64it/s] 34%|███▍ | 127974/371472 [10:10:32<18:09:58, 3.72it/s] 34%|███▍ | 127975/371472 [10:10:32<18:21:11, 3.69it/s] 34%|███▍ | 127976/371472 [10:10:32<18:10:47, 3.72it/s] 34%|███▍ | 127977/371472 [10:10:33<20:12:03, 3.35it/s] 34%|███▍ | 127978/371472 [10:10:33<19:06:43, 3.54it/s] 34%|███▍ | 127979/371472 [10:10:33<20:28:00, 3.30it/s] 34%|███▍ | 127980/371472 [10:10:33<19:51:15, 3.41it/s] {'loss': 3.268, 'learning_rate': 6.902486586653451e-07, 'epoch': 5.51} + 34%|███▍ | 127980/371472 [10:10:33<19:51:15, 3.41it/s] 34%|███▍ | 127981/371472 [10:10:34<19:34:34, 3.46it/s] 34%|███▍ | 127982/371472 [10:10:34<19:19:00, 3.50it/s] 34%|███▍ | 127983/371472 [10:10:34<18:15:29, 3.70it/s] 34%|███▍ | 127984/371472 [10:10:34<18:27:11, 3.67it/s] 34%|███▍ | 127985/371472 [10:10:35<18:07:28, 3.73it/s] 34%|███▍ | 127986/371472 [10:10:35<17:42:50, 3.82it/s] 34%|███▍ | 127987/371472 [10:10:35<17:35:06, 3.85it/s] 34%|███▍ | 127988/371472 [10:10:35<17:17:40, 3.91it/s] 34%|███▍ | 127989/371472 [10:10:36<17:42:56, 3.82it/s] 34%|███▍ | 127990/371472 [10:10:36<18:33:51, 3.64it/s] 34%|███▍ | 127991/371472 [10:10:36<17:53:11, 3.78it/s] 34%|███▍ | 127992/371472 [10:10:37<18:26:08, 3.67it/s] 34%|███▍ | 127993/371472 [10:10:37<18:38:37, 3.63it/s] 34%|███▍ | 127994/371472 [10:10:37<18:11:08, 3.72it/s] 34%|███▍ | 127995/371472 [10:10:37<18:20:14, 3.69it/s] 34%|███▍ | 127996/371472 [10:10:38<18:48:36, 3.60it/s] 34%|███▍ | 127997/371472 [10:10:38<19:06:40, 3.54it/s] 34%|███▍ | 127998/371472 [10:10:38<19:26:54, 3.48it/s] 34%|███▍ | 127999/371472 [10:10:39<20:16:17, 3.34it/s] 34%|███▍ | 128000/371472 [10:10:39<19:42:39, 3.43it/s] {'loss': 3.1133, 'learning_rate': 6.902001766898662e-07, 'epoch': 5.51} + 34%|███▍ | 128000/371472 [10:10:39<19:42:39, 3.43it/s] 34%|███▍ | 128001/371472 [10:10:39<19:58:50, 3.38it/s] 34%|███▍ | 128002/371472 [10:10:40<21:24:21, 3.16it/s] 34%|███▍ | 128003/371472 [10:10:40<21:40:08, 3.12it/s] 34%|███▍ | 128004/371472 [10:10:40<20:06:11, 3.36it/s] 34%|███▍ | 128005/371472 [10:10:40<18:57:35, 3.57it/s] 34%|███▍ | 128006/371472 [10:10:41<20:00:38, 3.38it/s] 34%|███▍ | 128007/371472 [10:10:41<18:51:18, 3.59it/s] 34%|███▍ | 128008/371472 [10:10:41<19:35:35, 3.45it/s] 34%|███▍ | 128009/371472 [10:10:42<19:02:05, 3.55it/s] 34%|███▍ | 128010/371472 [10:10:42<18:38:30, 3.63it/s] 34%|███▍ | 128011/371472 [10:10:42<18:24:08, 3.67it/s] 34%|███▍ | 128012/371472 [10:10:42<17:57:52, 3.76it/s] 34%|███▍ | 128013/371472 [10:10:43<18:25:14, 3.67it/s] 34%|███▍ | 128014/371472 [10:10:43<18:09:26, 3.72it/s] 34%|███▍ | 128015/371472 [10:10:43<17:58:44, 3.76it/s] 34%|███▍ | 128016/371472 [10:10:43<17:37:12, 3.84it/s] 34%|███▍ | 128017/371472 [10:10:44<17:05:46, 3.96it/s] 34%|███▍ | 128018/371472 [10:10:44<17:40:46, 3.83it/s] 34%|███▍ | 128019/371472 [10:10:44<18:32:27, 3.65it/s] 34%|███▍ | 128020/371472 [10:10:44<19:17:30, 3.51it/s] {'loss': 3.0589, 'learning_rate': 6.901516947143872e-07, 'epoch': 5.51} + 34%|███▍ | 128020/371472 [10:10:44<19:17:30, 3.51it/s] 34%|███▍ | 128021/371472 [10:10:45<18:25:56, 3.67it/s] 34%|███▍ | 128022/371472 [10:10:45<17:47:41, 3.80it/s] 34%|███▍ | 128023/371472 [10:10:45<18:00:52, 3.75it/s] 34%|███▍ | 128024/371472 [10:10:46<19:26:41, 3.48it/s] 34%|███▍ | 128025/371472 [10:10:46<19:32:01, 3.46it/s] 34%|███▍ | 128026/371472 [10:10:46<18:43:45, 3.61it/s] 34%|███▍ | 128027/371472 [10:10:46<19:07:03, 3.54it/s] 34%|███▍ | 128028/371472 [10:10:47<18:42:57, 3.61it/s] 34%|███▍ | 128029/371472 [10:10:47<19:20:50, 3.50it/s] 34%|███▍ | 128030/371472 [10:10:47<18:21:18, 3.68it/s] 34%|███▍ | 128031/371472 [10:10:48<18:38:47, 3.63it/s] 34%|███▍ | 128032/371472 [10:10:48<20:55:00, 3.23it/s] 34%|███▍ | 128033/371472 [10:10:48<19:36:30, 3.45it/s] 34%|███▍ | 128034/371472 [10:10:48<19:28:19, 3.47it/s] 34%|███▍ | 128035/371472 [10:10:49<18:41:08, 3.62it/s] 34%|███▍ | 128036/371472 [10:10:49<18:22:04, 3.68it/s] 34%|███▍ | 128037/371472 [10:10:49<17:57:27, 3.77it/s] 34%|███▍ | 128038/371472 [10:10:50<20:08:46, 3.36it/s] 34%|███▍ | 128039/371472 [10:10:50<20:46:42, 3.25it/s] 34%|███▍ | 128040/371472 [10:10:50<20:15:56, 3.34it/s] {'loss': 3.2662, 'learning_rate': 6.901032127389084e-07, 'epoch': 5.51} + 34%|███▍ | 128040/371472 [10:10:50<20:15:56, 3.34it/s] 34%|███▍ | 128041/371472 [10:10:51<21:11:59, 3.19it/s] 34%|███▍ | 128042/371472 [10:10:51<20:52:28, 3.24it/s] 34%|███▍ | 128043/371472 [10:10:51<20:09:22, 3.35it/s] 34%|███▍ | 128044/371472 [10:10:51<21:26:03, 3.15it/s] 34%|███▍ | 128045/371472 [10:10:52<20:24:00, 3.31it/s] 34%|███▍ | 128046/371472 [10:10:52<19:37:16, 3.45it/s] 34%|███▍ | 128047/371472 [10:10:52<18:51:00, 3.59it/s] 34%|███▍ | 128048/371472 [10:10:53<18:52:36, 3.58it/s] 34%|███▍ | 128049/371472 [10:10:53<19:00:02, 3.56it/s] 34%|███▍ | 128050/371472 [10:10:53<21:03:44, 3.21it/s] 34%|███▍ | 128051/371472 [10:10:53<20:02:08, 3.37it/s] 34%|███▍ | 128052/371472 [10:10:54<19:41:41, 3.43it/s] 34%|███▍ | 128053/371472 [10:10:54<18:53:34, 3.58it/s] 34%|███▍ | 128054/371472 [10:10:54<18:57:44, 3.57it/s] 34%|███▍ | 128055/371472 [10:10:55<19:00:07, 3.56it/s] 34%|███▍ | 128056/371472 [10:10:55<18:43:35, 3.61it/s] 34%|███▍ | 128057/371472 [10:10:55<18:42:28, 3.61it/s] 34%|███▍ | 128058/371472 [10:10:55<19:13:26, 3.52it/s] 34%|███▍ | 128059/371472 [10:10:56<18:51:13, 3.59it/s] 34%|███▍ | 128060/371472 [10:10:56<18:20:12, 3.69it/s] {'loss': 3.1112, 'learning_rate': 6.900547307634295e-07, 'epoch': 5.52} + 34%|███▍ | 128060/371472 [10:10:56<18:20:12, 3.69it/s] 34%|███▍ | 128061/371472 [10:10:56<18:52:36, 3.58it/s] 34%|███▍ | 128062/371472 [10:10:56<18:14:37, 3.71it/s] 34%|███▍ | 128063/371472 [10:10:57<18:06:11, 3.73it/s] 34%|███▍ | 128064/371472 [10:10:57<17:57:28, 3.77it/s] 34%|███▍ | 128065/371472 [10:10:57<18:11:02, 3.72it/s] 34%|███▍ | 128066/371472 [10:10:58<18:50:53, 3.59it/s] 34%|███▍ | 128067/371472 [10:10:58<20:15:44, 3.34it/s] 34%|███▍ | 128068/371472 [10:10:58<21:31:10, 3.14it/s] 34%|███▍ | 128069/371472 [10:10:59<21:25:20, 3.16it/s] 34%|███▍ | 128070/371472 [10:10:59<20:03:22, 3.37it/s] 34%|███▍ | 128071/371472 [10:10:59<19:37:59, 3.44it/s] 34%|███▍ | 128072/371472 [10:10:59<19:00:11, 3.56it/s] 34%|███▍ | 128073/371472 [10:11:00<18:27:20, 3.66it/s] 34%|███▍ | 128074/371472 [10:11:00<18:31:52, 3.65it/s] 34%|███▍ | 128075/371472 [10:11:00<18:08:18, 3.73it/s] 34%|███▍ | 128076/371472 [10:11:01<20:35:41, 3.28it/s] 34%|███▍ | 128077/371472 [10:11:01<20:01:07, 3.38it/s] 34%|███▍ | 128078/371472 [10:11:01<19:19:11, 3.50it/s] 34%|███▍ | 128079/371472 [10:11:01<19:19:29, 3.50it/s] 34%|███▍ | 128080/371472 [10:11:02<18:29:50, 3.66it/s] {'loss': 3.1509, 'learning_rate': 6.900062487879506e-07, 'epoch': 5.52} + 34%|███▍ | 128080/371472 [10:11:02<18:29:50, 3.66it/s] 34%|███▍ | 128081/371472 [10:11:02<18:26:33, 3.67it/s] 34%|███▍ | 128082/371472 [10:11:02<18:17:37, 3.70it/s] 34%|███▍ | 128083/371472 [10:11:02<18:04:51, 3.74it/s] 34%|███▍ | 128084/371472 [10:11:03<17:48:15, 3.80it/s] 34%|███▍ | 128085/371472 [10:11:03<18:22:02, 3.68it/s] 34%|███▍ | 128086/371472 [10:11:03<18:03:57, 3.74it/s] 34%|███▍ | 128087/371472 [10:11:03<18:15:18, 3.70it/s] 34%|███▍ | 128088/371472 [10:11:04<18:10:17, 3.72it/s] 34%|███▍ | 128089/371472 [10:11:04<18:24:52, 3.67it/s] 34%|███▍ | 128090/371472 [10:11:04<18:33:19, 3.64it/s] 34%|███▍ | 128091/371472 [10:11:05<19:01:44, 3.55it/s] 34%|███▍ | 128092/371472 [10:11:05<19:04:18, 3.54it/s] 34%|███▍ | 128093/371472 [10:11:05<18:43:06, 3.61it/s] 34%|███▍ | 128094/371472 [10:11:05<19:41:12, 3.43it/s] 34%|███▍ | 128095/371472 [10:11:06<19:53:46, 3.40it/s] 34%|███▍ | 128096/371472 [10:11:06<18:42:53, 3.61it/s] 34%|███▍ | 128097/371472 [10:11:07<24:15:53, 2.79it/s] 34%|███▍ | 128098/371472 [10:11:07<22:43:30, 2.97it/s] 34%|███▍ | 128099/371472 [10:11:07<21:59:19, 3.07it/s] 34%|███▍ | 128100/371472 [10:11:07<21:07:30, 3.20it/s] {'loss': 3.2454, 'learning_rate': 6.899577668124717e-07, 'epoch': 5.52} + 34%|███▍ | 128100/371472 [10:11:07<21:07:30, 3.20it/s] 34%|███▍ | 128101/371472 [10:11:08<20:14:48, 3.34it/s] 34%|███▍ | 128102/371472 [10:11:08<19:35:54, 3.45it/s] 34%|███▍ | 128103/371472 [10:11:08<19:00:43, 3.56it/s] 34%|███▍ | 128104/371472 [10:11:08<18:29:48, 3.65it/s] 34%|███▍ | 128105/371472 [10:11:09<18:24:19, 3.67it/s] 34%|███▍ | 128106/371472 [10:11:09<18:44:36, 3.61it/s] 34%|███▍ | 128107/371472 [10:11:09<18:45:48, 3.60it/s] 34%|███▍ | 128108/371472 [10:11:10<19:09:28, 3.53it/s] 34%|███▍ | 128109/371472 [10:11:10<20:03:56, 3.37it/s] 34%|███▍ | 128110/371472 [10:11:10<20:12:53, 3.34it/s] 34%|███▍ | 128111/371472 [10:11:11<20:28:18, 3.30it/s] 34%|███▍ | 128112/371472 [10:11:11<20:45:29, 3.26it/s] 34%|███▍ | 128113/371472 [10:11:11<20:18:37, 3.33it/s] 34%|███▍ | 128114/371472 [10:11:11<20:09:27, 3.35it/s] 34%|███▍ | 128115/371472 [10:11:12<19:24:07, 3.48it/s] 34%|███▍ | 128116/371472 [10:11:12<19:08:26, 3.53it/s] 34%|███▍ | 128117/371472 [10:11:12<19:29:48, 3.47it/s] 34%|███▍ | 128118/371472 [10:11:13<20:45:26, 3.26it/s] 34%|███▍ | 128119/371472 [10:11:13<19:56:06, 3.39it/s] 34%|███▍ | 128120/371472 [10:11:13<20:04:49, 3.37it/s] {'loss': 3.3012, 'learning_rate': 6.899092848369929e-07, 'epoch': 5.52} + 34%|███▍ | 128120/371472 [10:11:13<20:04:49, 3.37it/s] 34%|███▍ | 128121/371472 [10:11:14<20:31:22, 3.29it/s] 34%|███▍ | 128122/371472 [10:11:14<20:00:05, 3.38it/s] 34%|███▍ | 128123/371472 [10:11:14<18:42:00, 3.61it/s] 34%|███▍ | 128124/371472 [10:11:14<18:17:25, 3.70it/s] 34%|███▍ | 128125/371472 [10:11:15<18:20:48, 3.68it/s] 34%|███▍ | 128126/371472 [10:11:15<18:26:55, 3.66it/s] 34%|███▍ | 128127/371472 [10:11:15<17:58:55, 3.76it/s] 34%|███▍ | 128128/371472 [10:11:15<18:09:00, 3.72it/s] 34%|███▍ | 128129/371472 [10:11:16<19:24:22, 3.48it/s] 34%|███▍ | 128130/371472 [10:11:16<18:53:43, 3.58it/s] 34%|███▍ | 128131/371472 [10:11:16<19:23:45, 3.48it/s] 34%|███▍ | 128132/371472 [10:11:17<20:47:45, 3.25it/s] 34%|███▍ | 128133/371472 [10:11:17<19:45:32, 3.42it/s] 34%|███▍ | 128134/371472 [10:11:17<18:50:25, 3.59it/s] 34%|███▍ | 128135/371472 [10:11:17<20:14:41, 3.34it/s] 34%|███▍ | 128136/371472 [10:11:18<20:26:47, 3.31it/s] 34%|███▍ | 128137/371472 [10:11:18<19:44:33, 3.42it/s] 34%|███▍ | 128138/371472 [10:11:18<19:08:03, 3.53it/s] 34%|███▍ | 128139/371472 [10:11:19<18:49:58, 3.59it/s] 34%|███▍ | 128140/371472 [10:11:19<18:45:04, 3.60it/s] {'loss': 3.4595, 'learning_rate': 6.898608028615139e-07, 'epoch': 5.52} + 34%|███▍ | 128140/371472 [10:11:19<18:45:04, 3.60it/s] 34%|███▍ | 128141/371472 [10:11:19<18:26:10, 3.67it/s] 34%|███▍ | 128142/371472 [10:11:19<18:24:03, 3.67it/s] 34%|███▍ | 128143/371472 [10:11:20<18:44:13, 3.61it/s] 34%|███▍ | 128144/371472 [10:11:20<20:09:41, 3.35it/s] 34%|███▍ | 128145/371472 [10:11:20<19:14:57, 3.51it/s] 34%|███▍ | 128146/371472 [10:11:21<18:34:43, 3.64it/s] 34%|███▍ | 128147/371472 [10:11:21<18:12:08, 3.71it/s] 34%|███▍ | 128148/371472 [10:11:21<17:40:22, 3.82it/s] 34%|███▍ | 128149/371472 [10:11:21<18:00:25, 3.75it/s] 34%|███▍ | 128150/371472 [10:11:22<17:16:06, 3.91it/s] 34%|███▍ | 128151/371472 [10:11:22<18:04:52, 3.74it/s] 34%|███▍ | 128152/371472 [10:11:22<18:03:55, 3.74it/s] 34%|███▍ | 128153/371472 [10:11:22<18:38:24, 3.63it/s] 34%|███▍ | 128154/371472 [10:11:23<18:32:58, 3.64it/s] 34%|███▍ | 128155/371472 [10:11:23<17:58:04, 3.76it/s] 34%|███▍ | 128156/371472 [10:11:23<17:42:22, 3.82it/s] 34%|███▍ | 128157/371472 [10:11:24<19:24:54, 3.48it/s] 35%|███▍ | 128158/371472 [10:11:24<19:06:41, 3.54it/s] 35%|███▍ | 128159/371472 [10:11:24<19:07:25, 3.53it/s] 35%|███▍ | 128160/371472 [10:11:24<18:33:18, 3.64it/s] {'loss': 3.2327, 'learning_rate': 6.89812320886035e-07, 'epoch': 5.52} + 35%|███▍ | 128160/371472 [10:11:24<18:33:18, 3.64it/s] 35%|███▍ | 128161/371472 [10:11:25<18:27:46, 3.66it/s] 35%|███▍ | 128162/371472 [10:11:25<18:01:41, 3.75it/s] 35%|███▍ | 128163/371472 [10:11:25<18:38:41, 3.62it/s] 35%|███▍ | 128164/371472 [10:11:25<18:10:31, 3.72it/s] 35%|███▍ | 128165/371472 [10:11:26<19:25:13, 3.48it/s] 35%|███▍ | 128166/371472 [10:11:26<19:07:25, 3.53it/s] 35%|███▍ | 128167/371472 [10:11:26<18:04:27, 3.74it/s] 35%|███▍ | 128168/371472 [10:11:26<18:03:12, 3.74it/s] 35%|███▍ | 128169/371472 [10:11:27<18:00:46, 3.75it/s] 35%|███▍ | 128170/371472 [10:11:27<19:29:50, 3.47it/s] 35%|███▍ | 128171/371472 [10:11:27<19:20:53, 3.49it/s] 35%|███▍ | 128172/371472 [10:11:28<22:57:32, 2.94it/s] 35%|███▍ | 128173/371472 [10:11:28<21:26:12, 3.15it/s] 35%|███▍ | 128174/371472 [10:11:28<21:05:48, 3.20it/s] 35%|███▍ | 128175/371472 [10:11:29<19:38:39, 3.44it/s] 35%|███▍ | 128176/371472 [10:11:29<20:09:46, 3.35it/s] 35%|███▍ | 128177/371472 [10:11:29<19:16:50, 3.51it/s] 35%|███▍ | 128178/371472 [10:11:30<19:31:03, 3.46it/s] 35%|███▍ | 128179/371472 [10:11:30<19:25:15, 3.48it/s] 35%|███▍ | 128180/371472 [10:11:30<18:58:10, 3.56it/s] {'loss': 3.0591, 'learning_rate': 6.897638389105561e-07, 'epoch': 5.52} + 35%|███▍ | 128180/371472 [10:11:30<18:58:10, 3.56it/s] 35%|███▍ | 128181/371472 [10:11:30<19:05:34, 3.54it/s] 35%|███▍ | 128182/371472 [10:11:31<19:18:12, 3.50it/s] 35%|███▍ | 128183/371472 [10:11:31<18:26:13, 3.67it/s] 35%|███▍ | 128184/371472 [10:11:31<19:50:57, 3.40it/s] 35%|███▍ | 128185/371472 [10:11:31<19:04:24, 3.54it/s] 35%|███▍ | 128186/371472 [10:11:32<18:41:25, 3.62it/s] 35%|███▍ | 128187/371472 [10:11:32<18:53:45, 3.58it/s] 35%|███▍ | 128188/371472 [10:11:32<18:26:44, 3.66it/s] 35%|███▍ | 128189/371472 [10:11:33<17:53:50, 3.78it/s] 35%|███▍ | 128190/371472 [10:11:33<17:57:48, 3.76it/s] 35%|███▍ | 128191/371472 [10:11:33<18:14:19, 3.71it/s] 35%|███▍ | 128192/371472 [10:11:33<17:43:14, 3.81it/s] 35%|███▍ | 128193/371472 [10:11:34<18:02:16, 3.75it/s] 35%|███▍ | 128194/371472 [10:11:34<18:07:33, 3.73it/s] 35%|███▍ | 128195/371472 [10:11:34<18:13:15, 3.71it/s] 35%|███▍ | 128196/371472 [10:11:34<17:40:00, 3.83it/s] 35%|███▍ | 128197/371472 [10:11:35<18:06:57, 3.73it/s] 35%|███▍ | 128198/371472 [10:11:35<17:37:49, 3.83it/s] 35%|███▍ | 128199/371472 [10:11:35<19:14:28, 3.51it/s] 35%|███▍ | 128200/371472 [10:11:36<18:35:53, 3.63it/s] {'loss': 3.5673, 'learning_rate': 6.897153569350772e-07, 'epoch': 5.52} + 35%|███▍ | 128200/371472 [10:11:36<18:35:53, 3.63it/s] 35%|███▍ | 128201/371472 [10:11:36<19:11:39, 3.52it/s] 35%|███▍ | 128202/371472 [10:11:36<19:48:24, 3.41it/s] 35%|███▍ | 128203/371472 [10:11:36<19:15:08, 3.51it/s] 35%|███▍ | 128204/371472 [10:11:37<20:16:19, 3.33it/s] 35%|███▍ | 128205/371472 [10:11:37<19:32:37, 3.46it/s] 35%|███▍ | 128206/371472 [10:11:37<18:33:28, 3.64it/s] 35%|███▍ | 128207/371472 [10:11:38<18:34:35, 3.64it/s] 35%|███▍ | 128208/371472 [10:11:38<18:57:38, 3.56it/s] 35%|███▍ | 128209/371472 [10:11:38<19:24:02, 3.48it/s] 35%|███▍ | 128210/371472 [10:11:38<19:13:10, 3.52it/s] 35%|███▍ | 128211/371472 [10:11:39<18:50:25, 3.59it/s] 35%|███▍ | 128212/371472 [10:11:39<18:13:43, 3.71it/s] 35%|███▍ | 128213/371472 [10:11:39<18:14:35, 3.70it/s] 35%|███▍ | 128214/371472 [10:11:39<17:25:10, 3.88it/s] 35%|███▍ | 128215/371472 [10:11:40<17:02:48, 3.96it/s] 35%|███▍ | 128216/371472 [10:11:40<17:24:31, 3.88it/s] 35%|███▍ | 128217/371472 [10:11:40<17:53:19, 3.78it/s] 35%|███▍ | 128218/371472 [10:11:40<17:49:10, 3.79it/s] 35%|███▍ | 128219/371472 [10:11:41<18:57:53, 3.56it/s] 35%|███▍ | 128220/371472 [10:11:41<18:37:34, 3.63it/s] {'loss': 3.253, 'learning_rate': 6.896668749595983e-07, 'epoch': 5.52} + 35%|███▍ | 128220/371472 [10:11:41<18:37:34, 3.63it/s] 35%|███▍ | 128221/371472 [10:11:41<18:18:40, 3.69it/s] 35%|███▍ | 128222/371472 [10:11:42<18:33:14, 3.64it/s] 35%|███▍ | 128223/371472 [10:11:42<18:18:38, 3.69it/s] 35%|███▍ | 128224/371472 [10:11:42<18:30:53, 3.65it/s] 35%|███▍ | 128225/371472 [10:11:42<18:26:37, 3.66it/s] 35%|███▍ | 128226/371472 [10:11:43<18:35:19, 3.63it/s] 35%|███▍ | 128227/371472 [10:11:43<18:33:43, 3.64it/s] 35%|███▍ | 128228/371472 [10:11:43<17:55:44, 3.77it/s] 35%|███▍ | 128229/371472 [10:11:43<17:15:20, 3.92it/s] 35%|███▍ | 128230/371472 [10:11:44<17:00:27, 3.97it/s] 35%|███▍ | 128231/371472 [10:11:44<18:10:09, 3.72it/s] 35%|███▍ | 128232/371472 [10:11:44<19:05:06, 3.54it/s] 35%|███▍ | 128233/371472 [10:11:45<19:16:58, 3.50it/s] 35%|███▍ | 128234/371472 [10:11:45<18:24:03, 3.67it/s] 35%|███▍ | 128235/371472 [10:11:45<18:21:49, 3.68it/s] 35%|███▍ | 128236/371472 [10:11:45<18:39:18, 3.62it/s] 35%|███▍ | 128237/371472 [10:11:46<18:45:04, 3.60it/s] 35%|███▍ | 128238/371472 [10:11:46<18:01:09, 3.75it/s] 35%|███▍ | 128239/371472 [10:11:46<18:17:26, 3.69it/s] 35%|███▍ | 128240/371472 [10:11:46<18:31:40, 3.65it/s] {'loss': 3.3035, 'learning_rate': 6.896183929841195e-07, 'epoch': 5.52} + 35%|███▍ | 128240/371472 [10:11:46<18:31:40, 3.65it/s] 35%|███▍ | 128241/371472 [10:11:47<18:07:12, 3.73it/s] 35%|███▍ | 128242/371472 [10:11:47<18:59:35, 3.56it/s] 35%|███▍ | 128243/371472 [10:11:47<19:25:06, 3.48it/s] 35%|███▍ | 128244/371472 [10:11:48<19:19:10, 3.50it/s] 35%|███▍ | 128245/371472 [10:11:48<19:25:16, 3.48it/s] 35%|███▍ | 128246/371472 [10:11:48<18:58:25, 3.56it/s] 35%|███▍ | 128247/371472 [10:11:48<19:32:55, 3.46it/s] 35%|███▍ | 128248/371472 [10:11:49<18:48:44, 3.59it/s] 35%|███▍ | 128249/371472 [10:11:49<18:09:37, 3.72it/s] 35%|███▍ | 128250/371472 [10:11:49<17:32:15, 3.85it/s] 35%|███▍ | 128251/371472 [10:11:49<17:52:16, 3.78it/s] 35%|███▍ | 128252/371472 [10:11:50<17:49:22, 3.79it/s] 35%|███▍ | 128253/371472 [10:11:50<17:40:18, 3.82it/s] 35%|███▍ | 128254/371472 [10:11:50<18:20:40, 3.68it/s] 35%|███▍ | 128255/371472 [10:11:51<19:12:15, 3.52it/s] 35%|███▍ | 128256/371472 [10:11:51<18:37:51, 3.63it/s] 35%|███▍ | 128257/371472 [10:11:51<18:17:25, 3.69it/s] 35%|███▍ | 128258/371472 [10:11:51<18:16:15, 3.70it/s] 35%|███▍ | 128259/371472 [10:11:52<18:35:26, 3.63it/s] 35%|███▍ | 128260/371472 [10:11:52<18:24:04, 3.67it/s] {'loss': 3.1991, 'learning_rate': 6.895699110086405e-07, 'epoch': 5.52} + 35%|███▍ | 128260/371472 [10:11:52<18:24:04, 3.67it/s] 35%|███▍ | 128261/371472 [10:11:52<17:53:34, 3.78it/s] 35%|███▍ | 128262/371472 [10:11:52<18:29:20, 3.65it/s] 35%|███▍ | 128263/371472 [10:11:53<18:25:51, 3.67it/s] 35%|���██▍ | 128264/371472 [10:11:53<19:00:40, 3.55it/s] 35%|███▍ | 128265/371472 [10:11:53<19:24:42, 3.48it/s] 35%|███▍ | 128266/371472 [10:11:54<18:59:01, 3.56it/s] 35%|███▍ | 128267/371472 [10:11:54<18:34:24, 3.64it/s] 35%|███▍ | 128268/371472 [10:11:54<18:55:57, 3.57it/s] 35%|███▍ | 128269/371472 [10:11:54<19:17:07, 3.50it/s] 35%|███▍ | 128270/371472 [10:11:55<18:13:24, 3.71it/s] 35%|███▍ | 128271/371472 [10:11:55<18:12:29, 3.71it/s] 35%|███▍ | 128272/371472 [10:11:55<18:18:57, 3.69it/s] 35%|███▍ | 128273/371472 [10:11:56<17:53:06, 3.78it/s] 35%|███▍ | 128274/371472 [10:11:56<17:45:48, 3.80it/s] 35%|███▍ | 128275/371472 [10:11:56<19:31:47, 3.46it/s] 35%|███▍ | 128276/371472 [10:11:56<19:23:46, 3.48it/s] 35%|███▍ | 128277/371472 [10:11:57<18:58:57, 3.56it/s] 35%|███▍ | 128278/371472 [10:11:57<18:32:51, 3.64it/s] 35%|███▍ | 128279/371472 [10:11:57<18:22:59, 3.67it/s] 35%|███▍ | 128280/371472 [10:11:58<19:04:47, 3.54it/s] {'loss': 3.2189, 'learning_rate': 6.895214290331616e-07, 'epoch': 5.53} + 35%|███▍ | 128280/371472 [10:11:58<19:04:47, 3.54it/s] 35%|███▍ | 128281/371472 [10:11:58<18:59:35, 3.56it/s] 35%|███▍ | 128282/371472 [10:11:58<19:13:51, 3.51it/s] 35%|███▍ | 128283/371472 [10:11:58<19:33:36, 3.45it/s] 35%|███▍ | 128284/371472 [10:11:59<19:04:57, 3.54it/s] 35%|███▍ | 128285/371472 [10:11:59<19:06:04, 3.54it/s] 35%|███▍ | 128286/371472 [10:11:59<19:04:51, 3.54it/s] 35%|███▍ | 128287/371472 [10:11:59<18:21:31, 3.68it/s] 35%|███▍ | 128288/371472 [10:12:00<17:35:43, 3.84it/s] 35%|███▍ | 128289/371472 [10:12:00<17:14:58, 3.92it/s] 35%|███▍ | 128290/371472 [10:12:00<17:16:28, 3.91it/s] 35%|███▍ | 128291/371472 [10:12:00<17:37:01, 3.83it/s] 35%|███▍ | 128292/371472 [10:12:01<17:05:56, 3.95it/s] 35%|███▍ | 128293/371472 [10:12:01<18:54:14, 3.57it/s] 35%|███▍ | 128294/371472 [10:12:01<18:12:07, 3.71it/s] 35%|███▍ | 128295/371472 [10:12:02<18:00:28, 3.75it/s] 35%|███▍ | 128296/371472 [10:12:02<17:48:24, 3.79it/s] 35%|███▍ | 128297/371472 [10:12:02<17:44:44, 3.81it/s] 35%|███▍ | 128298/371472 [10:12:02<17:22:12, 3.89it/s] 35%|███▍ | 128299/371472 [10:12:03<17:04:58, 3.95it/s] 35%|███▍ | 128300/371472 [10:12:03<17:11:40, 3.93it/s] {'loss': 3.3936, 'learning_rate': 6.894729470576827e-07, 'epoch': 5.53} + 35%|███▍ | 128300/371472 [10:12:03<17:11:40, 3.93it/s] 35%|███▍ | 128301/371472 [10:12:03<17:03:27, 3.96it/s] 35%|███▍ | 128302/371472 [10:12:03<17:04:27, 3.96it/s] 35%|███▍ | 128303/371472 [10:12:04<17:08:46, 3.94it/s] 35%|███▍ | 128304/371472 [10:12:04<17:07:53, 3.94it/s] 35%|███▍ | 128305/371472 [10:12:04<17:31:44, 3.85it/s] 35%|███▍ | 128306/371472 [10:12:04<17:56:59, 3.76it/s] 35%|███▍ | 128307/371472 [10:12:05<17:55:55, 3.77it/s] 35%|███▍ | 128308/371472 [10:12:05<17:59:04, 3.76it/s] 35%|███▍ | 128309/371472 [10:12:05<18:33:49, 3.64it/s] 35%|███▍ | 128310/371472 [10:12:06<20:00:45, 3.38it/s] 35%|███▍ | 128311/371472 [10:12:06<20:19:38, 3.32it/s] 35%|███▍ | 128312/371472 [10:12:06<19:58:42, 3.38it/s] 35%|███▍ | 128313/371472 [10:12:06<19:43:44, 3.42it/s] 35%|███▍ | 128314/371472 [10:12:07<19:07:12, 3.53it/s] 35%|███▍ | 128315/371472 [10:12:07<18:53:11, 3.58it/s] 35%|███▍ | 128316/371472 [10:12:07<20:14:57, 3.34it/s] 35%|███▍ | 128317/371472 [10:12:08<19:24:25, 3.48it/s] 35%|███▍ | 128318/371472 [10:12:08<19:45:24, 3.42it/s] 35%|███▍ | 128319/371472 [10:12:08<19:10:39, 3.52it/s] 35%|███▍ | 128320/371472 [10:12:08<18:48:17, 3.59it/s] {'loss': 3.0153, 'learning_rate': 6.894244650822038e-07, 'epoch': 5.53} + 35%|███▍ | 128320/371472 [10:12:08<18:48:17, 3.59it/s] 35%|███▍ | 128321/371472 [10:12:09<18:15:43, 3.70it/s] 35%|███▍ | 128322/371472 [10:12:09<17:53:06, 3.78it/s] 35%|███▍ | 128323/371472 [10:12:09<17:16:32, 3.91it/s] 35%|███▍ | 128324/371472 [10:12:09<18:09:52, 3.72it/s] 35%|███▍ | 128325/371472 [10:12:10<17:56:25, 3.76it/s] 35%|███▍ | 128326/371472 [10:12:10<18:08:02, 3.72it/s] 35%|███▍ | 128327/371472 [10:12:10<18:49:47, 3.59it/s] 35%|███▍ | 128328/371472 [10:12:11<19:01:29, 3.55it/s] 35%|███▍ | 128329/371472 [10:12:11<19:23:12, 3.48it/s] 35%|███▍ | 128330/371472 [10:12:11<19:11:35, 3.52it/s] 35%|███▍ | 128331/371472 [10:12:11<18:31:50, 3.64it/s] 35%|███▍ | 128332/371472 [10:12:12<18:04:40, 3.74it/s] 35%|███▍ | 128333/371472 [10:12:12<18:09:32, 3.72it/s] 35%|███▍ | 128334/371472 [10:12:12<17:51:25, 3.78it/s] 35%|███▍ | 128335/371472 [10:12:12<18:23:41, 3.67it/s] 35%|███▍ | 128336/371472 [10:12:13<19:01:16, 3.55it/s] 35%|███▍ | 128337/371472 [10:12:13<18:42:49, 3.61it/s] 35%|███▍ | 128338/371472 [10:12:13<19:22:02, 3.49it/s] 35%|███▍ | 128339/371472 [10:12:14<19:08:47, 3.53it/s] 35%|███▍ | 128340/371472 [10:12:14<18:35:24, 3.63it/s] {'loss': 3.1051, 'learning_rate': 6.89375983106725e-07, 'epoch': 5.53} + 35%|███▍ | 128340/371472 [10:12:14<18:35:24, 3.63it/s] 35%|███▍ | 128341/371472 [10:12:14<18:14:44, 3.70it/s] 35%|███▍ | 128342/371472 [10:12:14<17:38:38, 3.83it/s] 35%|███▍ | 128343/371472 [10:12:15<17:14:35, 3.92it/s] 35%|███▍ | 128344/371472 [10:12:15<17:26:26, 3.87it/s] 35%|███▍ | 128345/371472 [10:12:15<17:46:51, 3.80it/s] 35%|███▍ | 128346/371472 [10:12:15<18:07:54, 3.72it/s] 35%|███▍ | 128347/371472 [10:12:16<18:32:38, 3.64it/s] 35%|███▍ | 128348/371472 [10:12:16<18:22:05, 3.68it/s] 35%|███▍ | 128349/371472 [10:12:16<18:14:11, 3.70it/s] 35%|███▍ | 128350/371472 [10:12:16<17:48:33, 3.79it/s] 35%|███▍ | 128351/371472 [10:12:17<17:30:52, 3.86it/s] 35%|███▍ | 128352/371472 [10:12:17<19:22:17, 3.49it/s] 35%|███▍ | 128353/371472 [10:12:17<19:10:46, 3.52it/s] 35%|███▍ | 128354/371472 [10:12:18<19:14:03, 3.51it/s] 35%|███▍ | 128355/371472 [10:12:18<19:55:06, 3.39it/s] 35%|███▍ | 128356/371472 [10:12:18<19:37:49, 3.44it/s] 35%|███▍ | 128357/371472 [10:12:19<18:44:48, 3.60it/s] 35%|███▍ | 128358/371472 [10:12:19<18:10:26, 3.72it/s] 35%|███▍ | 128359/371472 [10:12:19<18:37:14, 3.63it/s] 35%|███▍ | 128360/371472 [10:12:19<18:39:05, 3.62it/s] {'loss': 3.0751, 'learning_rate': 6.893275011312461e-07, 'epoch': 5.53} + 35%|███▍ | 128360/371472 [10:12:19<18:39:05, 3.62it/s] 35%|███▍ | 128361/371472 [10:12:20<18:45:01, 3.60it/s] 35%|███▍ | 128362/371472 [10:12:20<19:02:04, 3.55it/s] 35%|███▍ | 128363/371472 [10:12:20<19:31:01, 3.46it/s] 35%|███▍ | 128364/371472 [10:12:20<18:46:36, 3.60it/s] 35%|███▍ | 128365/371472 [10:12:21<19:08:11, 3.53it/s] 35%|███▍ | 128366/371472 [10:12:21<19:04:08, 3.54it/s] 35%|███▍ | 128367/371472 [10:12:21<18:31:25, 3.65it/s] 35%|███▍ | 128368/371472 [10:12:22<19:20:16, 3.49it/s] 35%|███▍ | 128369/371472 [10:12:22<18:59:51, 3.55it/s] 35%|███▍ | 128370/371472 [10:12:22<20:03:04, 3.37it/s] 35%|███▍ | 128371/371472 [10:12:22<19:30:26, 3.46it/s] 35%|███▍ | 128372/371472 [10:12:23<18:45:38, 3.60it/s] 35%|███▍ | 128373/371472 [10:12:23<17:56:29, 3.76it/s] 35%|███▍ | 128374/371472 [10:12:23<17:37:35, 3.83it/s] 35%|███▍ | 128375/371472 [10:12:23<18:02:14, 3.74it/s] 35%|███▍ | 128376/371472 [10:12:24<19:21:28, 3.49it/s] 35%|███▍ | 128377/371472 [10:12:24<19:19:24, 3.49it/s] 35%|███▍ | 128378/371472 [10:12:24<18:34:14, 3.64it/s] 35%|███▍ | 128379/371472 [10:12:25<19:20:27, 3.49it/s] 35%|███▍ | 128380/371472 [10:12:25<19:28:14, 3.47it/s] {'loss': 3.3597, 'learning_rate': 6.892790191557672e-07, 'epoch': 5.53} + 35%|███▍ | 128380/371472 [10:12:25<19:28:14, 3.47it/s] 35%|███▍ | 128381/371472 [10:12:25<20:39:57, 3.27it/s] 35%|███▍ | 128382/371472 [10:12:26<19:33:34, 3.45it/s] 35%|███▍ | 128383/371472 [10:12:26<20:10:39, 3.35it/s] 35%|███▍ | 128384/371472 [10:12:26<20:11:38, 3.34it/s] 35%|███▍ | 128385/371472 [10:12:27<21:06:28, 3.20it/s] 35%|███▍ | 128386/371472 [10:12:27<20:48:36, 3.24it/s] 35%|███▍ | 128387/371472 [10:12:27<19:39:47, 3.43it/s] 35%|███▍ | 128388/371472 [10:12:27<19:13:33, 3.51it/s] 35%|███▍ | 128389/371472 [10:12:28<18:42:13, 3.61it/s] 35%|███▍ | 128390/371472 [10:12:28<17:59:45, 3.75it/s] 35%|███▍ | 128391/371472 [10:12:28<18:04:12, 3.74it/s] 35%|███▍ | 128392/371472 [10:12:28<18:37:16, 3.63it/s] 35%|███▍ | 128393/371472 [10:12:29<18:15:38, 3.70it/s] 35%|███▍ | 128394/371472 [10:12:29<18:48:06, 3.59it/s] 35%|███▍ | 128395/371472 [10:12:29<19:03:14, 3.54it/s] 35%|███▍ | 128396/371472 [10:12:30<18:32:11, 3.64it/s] 35%|███▍ | 128397/371472 [10:12:30<18:01:18, 3.75it/s] 35%|███▍ | 128398/371472 [10:12:30<17:49:13, 3.79it/s] 35%|███▍ | 128399/371472 [10:12:30<17:46:45, 3.80it/s] 35%|███▍ | 128400/371472 [10:12:31<18:14:50, 3.70it/s] {'loss': 3.2377, 'learning_rate': 6.892305371802882e-07, 'epoch': 5.53} + 35%|███▍ | 128400/371472 [10:12:31<18:14:50, 3.70it/s] 35%|███▍ | 128401/371472 [10:12:31<17:59:16, 3.75it/s] 35%|███▍ | 128402/371472 [10:12:31<18:38:28, 3.62it/s] 35%|███▍ | 128403/371472 [10:12:31<18:14:21, 3.70it/s] 35%|███▍ | 128404/371472 [10:12:32<17:41:48, 3.82it/s] 35%|███▍ | 128405/371472 [10:12:32<18:25:40, 3.66it/s] 35%|███▍ | 128406/371472 [10:12:32<18:52:03, 3.58it/s] 35%|███▍ | 128407/371472 [10:12:32<18:08:23, 3.72it/s] 35%|███▍ | 128408/371472 [10:12:33<17:52:18, 3.78it/s] 35%|███▍ | 128409/371472 [10:12:33<19:16:35, 3.50it/s] 35%|███▍ | 128410/371472 [10:12:33<18:56:16, 3.57it/s] 35%|███▍ | 128411/371472 [10:12:34<18:16:46, 3.69it/s] 35%|███▍ | 128412/371472 [10:12:34<19:40:50, 3.43it/s] 35%|███▍ | 128413/371472 [10:12:34<20:43:29, 3.26it/s] 35%|███▍ | 128414/371472 [10:12:35<19:29:23, 3.46it/s] 35%|███▍ | 128415/371472 [10:12:35<20:40:50, 3.26it/s] 35%|███▍ | 128416/371472 [10:12:35<21:38:39, 3.12it/s] 35%|███▍ | 128417/371472 [10:12:36<21:44:10, 3.11it/s] 35%|███▍ | 128418/371472 [10:12:36<21:33:51, 3.13it/s] 35%|███▍ | 128419/371472 [10:12:36<20:04:39, 3.36it/s] 35%|███▍ | 128420/371472 [10:12:36<20:02:07, 3.37it/s] {'loss': 3.1814, 'learning_rate': 6.891820552048094e-07, 'epoch': 5.53} + 35%|███▍ | 128420/371472 [10:12:36<20:02:07, 3.37it/s] 35%|███▍ | 128421/371472 [10:12:37<19:01:23, 3.55it/s] 35%|███▍ | 128422/371472 [10:12:37<18:16:01, 3.70it/s] 35%|███▍ | 128423/371472 [10:12:37<18:41:42, 3.61it/s] 35%|███▍ | 128424/371472 [10:12:37<18:16:33, 3.69it/s] 35%|███▍ | 128425/371472 [10:12:38<19:33:15, 3.45it/s] 35%|███▍ | 128426/371472 [10:12:38<18:59:19, 3.56it/s] 35%|███▍ | 128427/371472 [10:12:38<18:56:02, 3.57it/s] 35%|███▍ | 128428/371472 [10:12:39<18:58:35, 3.56it/s] 35%|███▍ | 128429/371472 [10:12:39<18:34:30, 3.63it/s] 35%|███▍ | 128430/371472 [10:12:39<20:25:42, 3.30it/s] 35%|███▍ | 128431/371472 [10:12:39<19:21:53, 3.49it/s] 35%|███▍ | 128432/371472 [10:12:40<20:26:41, 3.30it/s] 35%|███▍ | 128433/371472 [10:12:40<21:00:25, 3.21it/s] 35%|███▍ | 128434/371472 [10:12:40<20:20:33, 3.32it/s] 35%|███▍ | 128435/371472 [10:12:41<18:59:39, 3.55it/s] 35%|███▍ | 128436/371472 [10:12:41<20:58:30, 3.22it/s] 35%|███▍ | 128437/371472 [10:12:42<24:34:45, 2.75it/s] 35%|███▍ | 128438/371472 [10:12:42<22:48:28, 2.96it/s] 35%|███▍ | 128439/371472 [10:12:42<21:35:41, 3.13it/s] 35%|███▍ | 128440/371472 [10:12:42<22:14:58, 3.03it/s] {'loss': 3.0797, 'learning_rate': 6.891335732293305e-07, 'epoch': 5.53} + 35%|███▍ | 128440/371472 [10:12:42<22:14:58, 3.03it/s] 35%|███▍ | 128441/371472 [10:12:43<22:33:39, 2.99it/s] 35%|███▍ | 128442/371472 [10:12:43<22:04:53, 3.06it/s] 35%|███▍ | 128443/371472 [10:12:43<21:18:40, 3.17it/s] 35%|███▍ | 128444/371472 [10:12:44<21:01:59, 3.21it/s] 35%|███▍ | 128445/371472 [10:12:44<22:18:39, 3.03it/s] 35%|███▍ | 128446/371472 [10:12:44<21:25:35, 3.15it/s] 35%|███▍ | 128447/371472 [10:12:45<20:01:53, 3.37it/s] 35%|███▍ | 128448/371472 [10:12:45<20:15:53, 3.33it/s] 35%|███▍ | 128449/371472 [10:12:45<20:15:26, 3.33it/s] 35%|███▍ | 128450/371472 [10:12:45<19:10:43, 3.52it/s] 35%|███▍ | 128451/371472 [10:12:46<18:51:19, 3.58it/s] 35%|███▍ | 128452/371472 [10:12:46<18:06:09, 3.73it/s] 35%|███▍ | 128453/371472 [10:12:46<18:49:35, 3.59it/s] 35%|███▍ | 128454/371472 [10:12:47<22:03:09, 3.06it/s] 35%|███▍ | 128455/371472 [10:12:47<21:25:17, 3.15it/s] 35%|███▍ | 128456/371472 [10:12:47<20:42:37, 3.26it/s] 35%|███▍ | 128457/371472 [10:12:48<19:35:52, 3.44it/s] 35%|███▍ | 128458/371472 [10:12:48<19:39:23, 3.43it/s] 35%|███▍ | 128459/371472 [10:12:48<20:27:29, 3.30it/s] 35%|███▍ | 128460/371472 [10:12:48<20:53:40, 3.23it/s] {'loss': 3.1967, 'learning_rate': 6.890850912538515e-07, 'epoch': 5.53} + 35%|███▍ | 128460/371472 [10:12:48<20:53:40, 3.23it/s] 35%|███▍ | 128461/371472 [10:12:49<20:32:49, 3.29it/s] 35%|███▍ | 128462/371472 [10:12:49<20:13:38, 3.34it/s] 35%|███▍ | 128463/371472 [10:12:49<19:28:26, 3.47it/s] 35%|███▍ | 128464/371472 [10:12:50<19:30:05, 3.46it/s] 35%|███▍ | 128465/371472 [10:12:50<19:37:42, 3.44it/s] 35%|███▍ | 128466/371472 [10:12:50<19:08:01, 3.53it/s] 35%|███▍ | 128467/371472 [10:12:50<18:46:57, 3.59it/s] 35%|███▍ | 128468/371472 [10:12:51<18:26:52, 3.66it/s] 35%|███▍ | 128469/371472 [10:12:51<17:48:26, 3.79it/s] 35%|███▍ | 128470/371472 [10:12:51<19:02:09, 3.55it/s] 35%|███▍ | 128471/371472 [10:12:52<19:25:06, 3.48it/s] 35%|███▍ | 128472/371472 [10:12:52<20:52:59, 3.23it/s] 35%|███▍ | 128473/371472 [10:12:52<20:44:06, 3.26it/s] 35%|███▍ | 128474/371472 [10:12:53<21:06:22, 3.20it/s] 35%|███▍ | 128475/371472 [10:12:53<19:40:10, 3.43it/s] 35%|███▍ | 128476/371472 [10:12:53<19:09:46, 3.52it/s] 35%|███▍ | 128477/371472 [10:12:53<18:36:52, 3.63it/s] 35%|███▍ | 128478/371472 [10:12:54<18:15:30, 3.70it/s] 35%|███▍ | 128479/371472 [10:12:54<18:49:08, 3.59it/s] 35%|███▍ | 128480/371472 [10:12:54<19:23:58, 3.48it/s] {'loss': 3.2526, 'learning_rate': 6.890366092783727e-07, 'epoch': 5.53} + 35%|███▍ | 128480/371472 [10:12:54<19:23:58, 3.48it/s] 35%|███▍ | 128481/371472 [10:12:54<18:32:08, 3.64it/s] 35%|███▍ | 128482/371472 [10:12:55<18:26:42, 3.66it/s] 35%|███▍ | 128483/371472 [10:12:55<17:58:28, 3.76it/s] 35%|███▍ | 128484/371472 [10:12:55<17:33:28, 3.84it/s] 35%|███▍ | 128485/371472 [10:12:55<17:37:16, 3.83it/s] 35%|███▍ | 128486/371472 [10:12:56<17:30:29, 3.86it/s] 35%|███▍ | 128487/371472 [10:12:56<17:17:16, 3.90it/s] 35%|███▍ | 128488/371472 [10:12:56<17:37:57, 3.83it/s] 35%|███▍ | 128489/371472 [10:12:56<17:20:35, 3.89it/s] 35%|███▍ | 128490/371472 [10:12:57<17:07:11, 3.94it/s] 35%|███▍ | 128491/371472 [10:12:57<18:06:11, 3.73it/s] 35%|███▍ | 128492/371472 [10:12:57<18:54:14, 3.57it/s] 35%|███▍ | 128493/371472 [10:12:58<18:22:18, 3.67it/s] 35%|███▍ | 128494/371472 [10:12:58<18:17:45, 3.69it/s] 35%|███▍ | 128495/371472 [10:12:58<17:46:52, 3.80it/s] 35%|███▍ | 128496/371472 [10:12:58<17:40:51, 3.82it/s] 35%|███▍ | 128497/371472 [10:12:59<17:41:03, 3.82it/s] 35%|███▍ | 128498/371472 [10:12:59<17:28:02, 3.86it/s] 35%|███▍ | 128499/371472 [10:12:59<18:09:01, 3.72it/s] 35%|███▍ | 128500/371472 [10:12:59<17:38:01, 3.83it/s] {'loss': 3.139, 'learning_rate': 6.889881273028939e-07, 'epoch': 5.53} + 35%|███▍ | 128500/371472 [10:12:59<17:38:01, 3.83it/s] 35%|███▍ | 128501/371472 [10:13:00<17:36:49, 3.83it/s] 35%|███▍ | 128502/371472 [10:13:00<17:40:58, 3.82it/s] 35%|███▍ | 128503/371472 [10:13:00<17:47:53, 3.79it/s] 35%|███▍ | 128504/371472 [10:13:00<18:01:06, 3.75it/s] 35%|███▍ | 128505/371472 [10:13:01<18:19:30, 3.68it/s] 35%|███▍ | 128506/371472 [10:13:01<18:25:01, 3.66it/s] 35%|███▍ | 128507/371472 [10:13:01<18:01:53, 3.74it/s] 35%|███▍ | 128508/371472 [10:13:02<17:52:59, 3.77it/s] 35%|███▍ | 128509/371472 [10:13:02<17:56:39, 3.76it/s] 35%|███▍ | 128510/371472 [10:13:02<18:15:35, 3.70it/s] 35%|███▍ | 128511/371472 [10:13:02<18:57:47, 3.56it/s] 35%|███▍ | 128512/371472 [10:13:03<18:25:22, 3.66it/s] 35%|███▍ | 128513/371472 [10:13:03<19:05:13, 3.54it/s] 35%|███▍ | 128514/371472 [10:13:03<19:02:04, 3.55it/s] 35%|███▍ | 128515/371472 [10:13:03<18:34:58, 3.63it/s] 35%|███▍ | 128516/371472 [10:13:04<19:40:29, 3.43it/s] 35%|███▍ | 128517/371472 [10:13:04<19:18:30, 3.50it/s] 35%|███▍ | 128518/371472 [10:13:04<18:41:47, 3.61it/s] 35%|███▍ | 128519/371472 [10:13:05<18:22:07, 3.67it/s] 35%|███▍ | 128520/371472 [10:13:05<18:21:23, 3.68it/s] {'loss': 3.1816, 'learning_rate': 6.889396453274149e-07, 'epoch': 5.54} + 35%|███▍ | 128520/371472 [10:13:05<18:21:23, 3.68it/s] 35%|███▍ | 128521/371472 [10:13:05<18:27:21, 3.66it/s] 35%|███▍ | 128522/371472 [10:13:05<18:04:06, 3.74it/s] 35%|███▍ | 128523/371472 [10:13:06<17:33:05, 3.85it/s] 35%|███▍ | 128524/371472 [10:13:06<18:27:00, 3.66it/s] 35%|███▍ | 128525/371472 [10:13:06<20:10:22, 3.35it/s] 35%|███▍ | 128526/371472 [10:13:07<20:05:56, 3.36it/s] 35%|███▍ | 128527/371472 [10:13:07<19:51:18, 3.40it/s] 35%|███▍ | 128528/371472 [10:13:07<19:38:38, 3.44it/s] 35%|███▍ | 128529/371472 [10:13:07<19:57:49, 3.38it/s] 35%|███▍ | 128530/371472 [10:13:08<20:00:30, 3.37it/s] 35%|███▍ | 128531/371472 [10:13:08<19:08:50, 3.52it/s] 35%|███▍ | 128532/371472 [10:13:08<19:08:37, 3.53it/s] 35%|███▍ | 128533/371472 [10:13:09<18:33:35, 3.64it/s] 35%|███▍ | 128534/371472 [10:13:09<17:56:42, 3.76it/s] 35%|███▍ | 128535/371472 [10:13:09<18:26:33, 3.66it/s] 35%|███▍ | 128536/371472 [10:13:09<20:25:40, 3.30it/s] 35%|███▍ | 128537/371472 [10:13:10<19:10:18, 3.52it/s] 35%|███▍ | 128538/371472 [10:13:10<18:44:00, 3.60it/s] 35%|███▍ | 128539/371472 [10:13:10<19:21:08, 3.49it/s] 35%|███▍ | 128540/371472 [10:13:11<20:44:33, 3.25it/s] {'loss': 3.2645, 'learning_rate': 6.88891163351936e-07, 'epoch': 5.54} + 35%|███▍ | 128540/371472 [10:13:11<20:44:33, 3.25it/s] 35%|███▍ | 128541/371472 [10:13:11<21:44:38, 3.10it/s] 35%|███▍ | 128542/371472 [10:13:11<21:33:32, 3.13it/s] 35%|███▍ | 128543/371472 [10:13:12<20:34:05, 3.28it/s] 35%|███▍ | 128544/371472 [10:13:12<20:05:11, 3.36it/s] 35%|███▍ | 128545/371472 [10:13:12<19:32:52, 3.45it/s] 35%|███▍ | 128546/371472 [10:13:12<20:24:32, 3.31it/s] 35%|███▍ | 128547/371472 [10:13:13<19:21:02, 3.49it/s] 35%|███▍ | 128548/371472 [10:13:13<19:53:26, 3.39it/s] 35%|███▍ | 128549/371472 [10:13:13<18:49:20, 3.59it/s] 35%|███▍ | 128550/371472 [10:13:14<18:29:48, 3.65it/s] 35%|███▍ | 128551/371472 [10:13:14<19:52:10, 3.40it/s] 35%|███▍ | 128552/371472 [10:13:14<19:51:45, 3.40it/s] 35%|███▍ | 128553/371472 [10:13:14<19:40:52, 3.43it/s] 35%|███▍ | 128554/371472 [10:13:15<19:26:00, 3.47it/s] 35%|███▍ | 128555/371472 [10:13:15<19:40:29, 3.43it/s] 35%|███▍ | 128556/371472 [10:13:15<19:16:20, 3.50it/s] 35%|███▍ | 128557/371472 [10:13:16<19:07:47, 3.53it/s] 35%|███▍ | 128558/371472 [10:13:16<19:56:54, 3.38it/s] 35%|███▍ | 128559/371472 [10:13:16<19:58:47, 3.38it/s] 35%|███▍ | 128560/371472 [10:13:16<19:22:56, 3.48it/s] {'loss': 3.0456, 'learning_rate': 6.888426813764572e-07, 'epoch': 5.54} + 35%|███▍ | 128560/371472 [10:13:16<19:22:56, 3.48it/s] 35%|███▍ | 128561/371472 [10:13:17<20:00:30, 3.37it/s] 35%|███▍ | 128562/371472 [10:13:17<20:04:14, 3.36it/s] 35%|███▍ | 128563/371472 [10:13:17<20:50:02, 3.24it/s] 35%|███▍ | 128564/371472 [10:13:18<20:35:39, 3.28it/s] 35%|███▍ | 128565/371472 [10:13:18<19:40:19, 3.43it/s] 35%|███▍ | 128566/371472 [10:13:18<20:17:13, 3.33it/s] 35%|███▍ | 128567/371472 [10:13:19<19:18:24, 3.49it/s] 35%|███▍ | 128568/371472 [10:13:19<18:22:38, 3.67it/s] 35%|███▍ | 128569/371472 [10:13:19<18:05:34, 3.73it/s] 35%|███▍ | 128570/371472 [10:13:19<17:40:52, 3.82it/s] 35%|███▍ | 128571/371472 [10:13:20<17:32:19, 3.85it/s] 35%|███▍ | 128572/371472 [10:13:20<19:06:44, 3.53it/s] 35%|███▍ | 128573/371472 [10:13:20<18:08:31, 3.72it/s] 35%|███▍ | 128574/371472 [10:13:20<18:00:10, 3.75it/s] 35%|███▍ | 128575/371472 [10:13:21<19:14:40, 3.51it/s] 35%|███▍ | 128576/371472 [10:13:21<18:35:57, 3.63it/s] 35%|███▍ | 128577/371472 [10:13:21<18:52:18, 3.58it/s] 35%|███▍ | 128578/371472 [10:13:22<19:08:08, 3.53it/s] 35%|███▍ | 128579/371472 [10:13:22<18:38:51, 3.62it/s] 35%|███▍ | 128580/371472 [10:13:22<20:18:14, 3.32it/s] {'loss': 3.12, 'learning_rate': 6.887941994009783e-07, 'epoch': 5.54} + 35%|███▍ | 128580/371472 [10:13:22<20:18:14, 3.32it/s] 35%|███▍ | 128581/371472 [10:13:22<20:05:03, 3.36it/s] 35%|███▍ | 128582/371472 [10:13:23<20:15:25, 3.33it/s] 35%|███▍ | 128583/371472 [10:13:23<19:53:54, 3.39it/s] 35%|███▍ | 128584/371472 [10:13:23<19:47:32, 3.41it/s] 35%|███▍ | 128585/371472 [10:13:24<19:15:49, 3.50it/s] 35%|███▍ | 128586/371472 [10:13:24<18:56:17, 3.56it/s] 35%|███▍ | 128587/371472 [10:13:24<17:59:23, 3.75it/s] 35%|███▍ | 128588/371472 [10:13:24<18:12:12, 3.71it/s] 35%|███▍ | 128589/371472 [10:13:25<17:50:11, 3.78it/s] 35%|███▍ | 128590/371472 [10:13:25<18:18:43, 3.68it/s] 35%|███▍ | 128591/371472 [10:13:25<19:37:59, 3.44it/s] 35%|███▍ | 128592/371472 [10:13:26<19:18:25, 3.49it/s] 35%|███▍ | 128593/371472 [10:13:26<19:02:42, 3.54it/s] 35%|███▍ | 128594/371472 [10:13:26<18:28:39, 3.65it/s] 35%|███▍ | 128595/371472 [10:13:26<19:39:18, 3.43it/s] 35%|███▍ | 128596/371472 [10:13:27<20:45:20, 3.25it/s] 35%|███▍ | 128597/371472 [10:13:27<20:28:39, 3.29it/s] 35%|███▍ | 128598/371472 [10:13:27<20:49:36, 3.24it/s] 35%|███▍ | 128599/371472 [10:13:28<21:51:48, 3.09it/s] 35%|███▍ | 128600/371472 [10:13:28<21:44:31, 3.10it/s] {'loss': 3.0965, 'learning_rate': 6.887457174254994e-07, 'epoch': 5.54} + 35%|███▍ | 128600/371472 [10:13:28<21:44:31, 3.10it/s] 35%|███▍ | 128601/371472 [10:13:28<21:30:29, 3.14it/s] 35%|███▍ | 128602/371472 [10:13:29<20:35:14, 3.28it/s] 35%|███▍ | 128603/371472 [10:13:29<20:03:46, 3.36it/s] 35%|███▍ | 128604/371472 [10:13:29<19:40:16, 3.43it/s] 35%|███▍ | 128605/371472 [10:13:29<19:53:29, 3.39it/s] 35%|███▍ | 128606/371472 [10:13:30<20:23:57, 3.31it/s] 35%|███▍ | 128607/371472 [10:13:30<20:14:44, 3.33it/s] 35%|███▍ | 128608/371472 [10:13:30<21:07:26, 3.19it/s] 35%|███▍ | 128609/371472 [10:13:31<19:24:47, 3.48it/s] 35%|███▍ | 128610/371472 [10:13:31<18:25:05, 3.66it/s] 35%|███▍ | 128611/371472 [10:13:31<17:51:23, 3.78it/s] 35%|███▍ | 128612/371472 [10:13:32<21:23:43, 3.15it/s] 35%|███▍ | 128613/371472 [10:13:32<21:24:22, 3.15it/s] 35%|███▍ | 128614/371472 [10:13:32<20:50:31, 3.24it/s] 35%|███▍ | 128615/371472 [10:13:32<19:56:52, 3.38it/s] 35%|███▍ | 128616/371472 [10:13:33<19:26:17, 3.47it/s] 35%|███▍ | 128617/371472 [10:13:33<21:01:46, 3.21it/s] 35%|███▍ | 128618/371472 [10:13:33<20:40:35, 3.26it/s] 35%|███▍ | 128619/371472 [10:13:34<19:15:20, 3.50it/s] 35%|███▍ | 128620/371472 [10:13:34<18:22:27, 3.67it/s] {'loss': 3.0685, 'learning_rate': 6.886972354500204e-07, 'epoch': 5.54} + 35%|███▍ | 128620/371472 [10:13:34<18:22:27, 3.67it/s] 35%|███▍ | 128621/371472 [10:13:34<18:23:24, 3.67it/s] 35%|███▍ | 128622/371472 [10:13:34<18:26:24, 3.66it/s] 35%|███▍ | 128623/371472 [10:13:35<19:20:06, 3.49it/s] 35%|███▍ | 128624/371472 [10:13:35<18:22:17, 3.67it/s] 35%|███▍ | 128625/371472 [10:13:35<18:46:22, 3.59it/s] 35%|███▍ | 128626/371472 [10:13:36<18:21:59, 3.67it/s] 35%|███▍ | 128627/371472 [10:13:36<19:07:17, 3.53it/s] 35%|███▍ | 128628/371472 [10:13:36<20:39:02, 3.27it/s] 35%|███▍ | 128629/371472 [10:13:36<19:30:50, 3.46it/s] 35%|███▍ | 128630/371472 [10:13:37<20:07:28, 3.35it/s] 35%|███▍ | 128631/371472 [10:13:37<19:05:30, 3.53it/s] 35%|███▍ | 128632/371472 [10:13:37<18:20:01, 3.68it/s] 35%|███▍ | 128633/371472 [10:13:38<18:53:55, 3.57it/s] 35%|███▍ | 128634/371472 [10:13:38<18:20:02, 3.68it/s] 35%|███▍ | 128635/371472 [10:13:38<19:02:49, 3.54it/s] 35%|███▍ | 128636/371472 [10:13:38<18:58:03, 3.56it/s] 35%|███▍ | 128637/371472 [10:13:39<18:21:10, 3.68it/s] 35%|███▍ | 128638/371472 [10:13:39<17:47:33, 3.79it/s] 35%|███▍ | 128639/371472 [10:13:39<17:30:58, 3.85it/s] 35%|███▍ | 128640/371472 [10:13:39<18:00:01, 3.75it/s] {'loss': 3.1361, 'learning_rate': 6.886487534745415e-07, 'epoch': 5.54} + 35%|███▍ | 128640/371472 [10:13:39<18:00:01, 3.75it/s] 35%|███▍ | 128641/371472 [10:13:40<17:49:52, 3.78it/s] 35%|███▍ | 128642/371472 [10:13:40<17:21:29, 3.89it/s] 35%|███▍ | 128643/371472 [10:13:40<17:54:13, 3.77it/s] 35%|███▍ | 128644/371472 [10:13:41<18:13:40, 3.70it/s] 35%|███▍ | 128645/371472 [10:13:41<18:27:34, 3.65it/s] 35%|███▍ | 128646/371472 [10:13:41<18:07:06, 3.72it/s] 35%|��██▍ | 128647/371472 [10:13:41<17:40:00, 3.82it/s] 35%|███▍ | 128648/371472 [10:13:42<17:34:07, 3.84it/s] 35%|███▍ | 128649/371472 [10:13:42<17:43:22, 3.81it/s] 35%|███▍ | 128650/371472 [10:13:42<18:22:48, 3.67it/s] 35%|███▍ | 128651/371472 [10:13:42<18:19:54, 3.68it/s] 35%|███▍ | 128652/371472 [10:13:43<18:34:44, 3.63it/s] 35%|███▍ | 128653/371472 [10:13:43<17:53:59, 3.77it/s] 35%|███▍ | 128654/371472 [10:13:43<17:50:24, 3.78it/s] 35%|███▍ | 128655/371472 [10:13:43<17:39:03, 3.82it/s] 35%|███▍ | 128656/371472 [10:13:44<17:26:29, 3.87it/s] 35%|███▍ | 128657/371472 [10:13:44<17:10:49, 3.93it/s] 35%|███▍ | 128658/371472 [10:13:44<17:55:23, 3.76it/s] 35%|███▍ | 128659/371472 [10:13:45<20:15:49, 3.33it/s] 35%|███▍ | 128660/371472 [10:13:45<19:38:50, 3.43it/s] {'loss': 3.2411, 'learning_rate': 6.886002714990627e-07, 'epoch': 5.54} + 35%|███▍ | 128660/371472 [10:13:45<19:38:50, 3.43it/s] 35%|███▍ | 128661/371472 [10:13:45<19:39:20, 3.43it/s] 35%|███▍ | 128662/371472 [10:13:45<19:17:47, 3.50it/s] 35%|███▍ | 128663/371472 [10:13:46<20:23:28, 3.31it/s] 35%|███▍ | 128664/371472 [10:13:46<20:09:15, 3.35it/s] 35%|███▍ | 128665/371472 [10:13:46<19:53:52, 3.39it/s] 35%|███▍ | 128666/371472 [10:13:47<19:23:47, 3.48it/s] 35%|███▍ | 128667/371472 [10:13:47<19:33:11, 3.45it/s] 35%|███▍ | 128668/371472 [10:13:47<19:08:41, 3.52it/s] 35%|███▍ | 128669/371472 [10:13:47<18:43:47, 3.60it/s] 35%|███▍ | 128670/371472 [10:13:48<19:22:42, 3.48it/s] 35%|███▍ | 128671/371472 [10:13:48<20:38:19, 3.27it/s] 35%|███▍ | 128672/371472 [10:13:48<20:34:03, 3.28it/s] 35%|███▍ | 128673/371472 [10:13:49<19:43:42, 3.42it/s] 35%|███▍ | 128674/371472 [10:13:49<19:27:42, 3.47it/s] 35%|███▍ | 128675/371472 [10:13:49<18:56:13, 3.56it/s] 35%|███▍ | 128676/371472 [10:13:49<18:29:02, 3.65it/s] 35%|███▍ | 128677/371472 [10:13:50<18:18:08, 3.68it/s] 35%|███▍ | 128678/371472 [10:13:50<17:50:59, 3.78it/s] 35%|███▍ | 128679/371472 [10:13:50<18:54:06, 3.57it/s] 35%|███▍ | 128680/371472 [10:13:51<18:19:53, 3.68it/s] {'loss': 3.0284, 'learning_rate': 6.885517895235837e-07, 'epoch': 5.54} + 35%|███▍ | 128680/371472 [10:13:51<18:19:53, 3.68it/s] 35%|███▍ | 128681/371472 [10:13:51<18:29:12, 3.65it/s] 35%|███▍ | 128682/371472 [10:13:51<17:53:47, 3.77it/s] 35%|███▍ | 128683/371472 [10:13:51<19:48:51, 3.40it/s] 35%|███▍ | 128684/371472 [10:13:52<19:54:13, 3.39it/s] 35%|███▍ | 128685/371472 [10:13:52<18:59:40, 3.55it/s] 35%|███▍ | 128686/371472 [10:13:52<18:52:31, 3.57it/s] 35%|███▍ | 128687/371472 [10:13:53<18:18:33, 3.68it/s] 35%|███▍ | 128688/371472 [10:13:53<18:52:07, 3.57it/s] 35%|███▍ | 128689/371472 [10:13:53<19:06:58, 3.53it/s] 35%|███▍ | 128690/371472 [10:13:53<18:56:17, 3.56it/s] 35%|███▍ | 128691/371472 [10:13:54<18:23:58, 3.67it/s] 35%|███▍ | 128692/371472 [10:13:54<18:26:29, 3.66it/s] 35%|███▍ | 128693/371472 [10:13:54<17:57:53, 3.75it/s] 35%|███▍ | 128694/371472 [10:13:54<17:42:03, 3.81it/s] 35%|███▍ | 128695/371472 [10:13:55<17:55:55, 3.76it/s] 35%|███▍ | 128696/371472 [10:13:55<17:55:55, 3.76it/s] 35%|███▍ | 128697/371472 [10:13:55<17:34:12, 3.84it/s] 35%|███▍ | 128698/371472 [10:13:55<17:46:45, 3.79it/s] 35%|███▍ | 128699/371472 [10:13:56<19:58:20, 3.38it/s] 35%|███▍ | 128700/371472 [10:13:56<19:05:58, 3.53it/s] {'loss': 3.3229, 'learning_rate': 6.885033075481048e-07, 'epoch': 5.54} + 35%|███▍ | 128700/371472 [10:13:56<19:05:58, 3.53it/s] 35%|███▍ | 128701/371472 [10:13:56<18:35:29, 3.63it/s] 35%|███▍ | 128702/371472 [10:13:57<19:57:10, 3.38it/s] 35%|███▍ | 128703/371472 [10:13:57<20:00:42, 3.37it/s] 35%|███▍ | 128704/371472 [10:13:57<19:16:22, 3.50it/s] 35%|███▍ | 128705/371472 [10:13:58<18:44:25, 3.60it/s] 35%|███▍ | 128706/371472 [10:13:58<18:48:10, 3.59it/s] 35%|███▍ | 128707/371472 [10:13:58<19:14:22, 3.50it/s] 35%|███▍ | 128708/371472 [10:13:58<19:21:42, 3.48it/s] 35%|███▍ | 128709/371472 [10:13:59<18:52:45, 3.57it/s] 35%|███▍ | 128710/371472 [10:13:59<19:02:37, 3.54it/s] 35%|███▍ | 128711/371472 [10:13:59<18:32:21, 3.64it/s] 35%|███▍ | 128712/371472 [10:13:59<18:07:21, 3.72it/s] 35%|███▍ | 128713/371472 [10:14:00<17:41:15, 3.81it/s] 35%|███▍ | 128714/371472 [10:14:00<17:43:24, 3.80it/s] 35%|███▍ | 128715/371472 [10:14:00<17:32:36, 3.84it/s] 35%|███▍ | 128716/371472 [10:14:00<17:43:27, 3.80it/s] 35%|███▍ | 128717/371472 [10:14:01<17:55:57, 3.76it/s] 35%|███▍ | 128718/371472 [10:14:01<18:32:07, 3.64it/s] 35%|███▍ | 128719/371472 [10:14:01<18:16:49, 3.69it/s] 35%|███▍ | 128720/371472 [10:14:02<18:46:15, 3.59it/s] {'loss': 3.2227, 'learning_rate': 6.88454825572626e-07, 'epoch': 5.54} + 35%|███▍ | 128720/371472 [10:14:02<18:46:15, 3.59it/s] 35%|███▍ | 128721/371472 [10:14:02<18:24:06, 3.66it/s] 35%|███▍ | 128722/371472 [10:14:02<18:15:26, 3.69it/s] 35%|███▍ | 128723/371472 [10:14:02<18:36:55, 3.62it/s] 35%|███▍ | 128724/371472 [10:14:03<18:26:57, 3.65it/s] 35%|███▍ | 128725/371472 [10:14:03<17:55:56, 3.76it/s] 35%|███▍ | 128726/371472 [10:14:03<17:47:06, 3.79it/s] 35%|███▍ | 128727/371472 [10:14:03<17:48:30, 3.79it/s] 35%|███▍ | 128728/371472 [10:14:04<17:39:50, 3.82it/s] 35%|███▍ | 128729/371472 [10:14:04<17:52:07, 3.77it/s] 35%|███▍ | 128730/371472 [10:14:04<19:26:47, 3.47it/s] 35%|███▍ | 128731/371472 [10:14:05<22:10:00, 3.04it/s] 35%|███▍ | 128732/371472 [10:14:05<21:48:03, 3.09it/s] 35%|███▍ | 128733/371472 [10:14:05<20:09:15, 3.35it/s] 35%|███▍ | 128734/371472 [10:14:06<19:16:01, 3.50it/s] 35%|███▍ | 128735/371472 [10:14:06<19:32:41, 3.45it/s] 35%|███▍ | 128736/371472 [10:14:06<18:36:08, 3.62it/s] 35%|███▍ | 128737/371472 [10:14:06<19:53:49, 3.39it/s] 35%|███▍ | 128738/371472 [10:14:07<19:08:41, 3.52it/s] 35%|███▍ | 128739/371472 [10:14:07<18:37:06, 3.62it/s] 35%|███▍ | 128740/371472 [10:14:07<20:30:03, 3.29it/s] {'loss': 3.2449, 'learning_rate': 6.884063435971471e-07, 'epoch': 5.55} + 35%|███▍ | 128740/371472 [10:14:07<20:30:03, 3.29it/s] 35%|███▍ | 128741/371472 [10:14:08<19:28:14, 3.46it/s] 35%|███▍ | 128742/371472 [10:14:08<20:51:28, 3.23it/s] 35%|███▍ | 128743/371472 [10:14:08<20:07:09, 3.35it/s] 35%|███▍ | 128744/371472 [10:14:08<19:29:02, 3.46it/s] 35%|███▍ | 128745/371472 [10:14:09<19:12:13, 3.51it/s] 35%|███▍ | 128746/371472 [10:14:09<18:40:20, 3.61it/s] 35%|███▍ | 128747/371472 [10:14:09<18:48:38, 3.58it/s] 35%|███▍ | 128748/371472 [10:14:10<19:00:22, 3.55it/s] 35%|███▍ | 128749/371472 [10:14:10<18:38:20, 3.62it/s] 35%|███▍ | 128750/371472 [10:14:10<18:24:29, 3.66it/s] 35%|███▍ | 128751/371472 [10:14:10<18:33:12, 3.63it/s] 35%|███▍ | 128752/371472 [10:14:11<18:33:24, 3.63it/s] 35%|███▍ | 128753/371472 [10:14:11<18:41:17, 3.61it/s] 35%|███▍ | 128754/371472 [10:14:11<20:26:09, 3.30it/s] 35%|███▍ | 128755/371472 [10:14:12<19:08:19, 3.52it/s] 35%|███▍ | 128756/371472 [10:14:12<20:01:27, 3.37it/s] 35%|███▍ | 128757/371472 [10:14:12<19:11:24, 3.51it/s] 35%|███▍ | 128758/371472 [10:14:12<18:51:25, 3.58it/s] 35%|███▍ | 128759/371472 [10:14:13<18:32:17, 3.64it/s] 35%|███▍ | 128760/371472 [10:14:13<18:54:52, 3.56it/s] {'loss': 3.2154, 'learning_rate': 6.883578616216682e-07, 'epoch': 5.55} + 35%|███▍ | 128760/371472 [10:14:13<18:54:52, 3.56it/s] 35%|███▍ | 128761/371472 [10:14:13<18:16:30, 3.69it/s] 35%|███▍ | 128762/371472 [10:14:13<18:22:51, 3.67it/s] 35%|███▍ | 128763/371472 [10:14:14<18:49:58, 3.58it/s] 35%|███▍ | 128764/371472 [10:14:14<19:28:12, 3.46it/s] 35%|███▍ | 128765/371472 [10:14:14<19:24:59, 3.47it/s] 35%|███▍ | 128766/371472 [10:14:15<19:21:09, 3.48it/s] 35%|███▍ | 128767/371472 [10:14:15<19:21:23, 3.48it/s] 35%|███▍ | 128768/371472 [10:14:15<18:29:34, 3.65it/s] 35%|███▍ | 128769/371472 [10:14:15<18:12:01, 3.70it/s] 35%|███▍ | 128770/371472 [10:14:16<19:25:04, 3.47it/s] 35%|███▍ | 128771/371472 [10:14:16<19:29:23, 3.46it/s] 35%|███▍ | 128772/371472 [10:14:16<18:58:11, 3.55it/s] 35%|███▍ | 128773/371472 [10:14:17<18:56:58, 3.56it/s] 35%|███▍ | 128774/371472 [10:14:17<19:07:22, 3.53it/s] 35%|███▍ | 128775/371472 [10:14:17<18:06:43, 3.72it/s] 35%|███▍ | 128776/371472 [10:14:17<18:22:40, 3.67it/s] 35%|███▍ | 128777/371472 [10:14:18<19:35:34, 3.44it/s] 35%|███▍ | 128778/371472 [10:14:18<20:40:44, 3.26it/s] 35%|███▍ | 128779/371472 [10:14:18<20:46:07, 3.25it/s] 35%|███▍ | 128780/371472 [10:14:19<20:36:31, 3.27it/s] {'loss': 3.2846, 'learning_rate': 6.883093796461892e-07, 'epoch': 5.55} + 35%|███▍ | 128780/371472 [10:14:19<20:36:31, 3.27it/s] 35%|███▍ | 128781/371472 [10:14:19<20:23:45, 3.31it/s] 35%|███▍ | 128782/371472 [10:14:19<20:38:14, 3.27it/s] 35%|███▍ | 128783/371472 [10:14:20<21:11:22, 3.18it/s] 35%|███▍ | 128784/371472 [10:14:20<20:07:38, 3.35it/s] 35%|███▍ | 128785/371472 [10:14:20<19:14:18, 3.50it/s] 35%|███▍ | 128786/371472 [10:14:20<19:02:40, 3.54it/s] 35%|███▍ | 128787/371472 [10:14:21<18:57:38, 3.56it/s] 35%|███▍ | 128788/371472 [10:14:21<18:48:20, 3.58it/s] 35%|███▍ | 128789/371472 [10:14:21<18:55:05, 3.56it/s] 35%|███▍ | 128790/371472 [10:14:22<19:21:46, 3.48it/s] 35%|███▍ | 128791/371472 [10:14:22<18:43:43, 3.60it/s] 35%|███▍ | 128792/371472 [10:14:22<18:56:19, 3.56it/s] 35%|███▍ | 128793/371472 [10:14:22<19:06:47, 3.53it/s] 35%|███▍ | 128794/371472 [10:14:23<21:00:23, 3.21it/s] 35%|███▍ | 128795/371472 [10:14:23<19:49:08, 3.40it/s] 35%|███▍ | 128796/371472 [10:14:23<19:44:10, 3.42it/s] 35%|███▍ | 128797/371472 [10:14:24<19:49:18, 3.40it/s] 35%|███▍ | 128798/371472 [10:14:24<18:48:20, 3.58it/s] 35%|███▍ | 128799/371472 [10:14:24<18:27:47, 3.65it/s] 35%|███▍ | 128800/371472 [10:14:24<19:22:51, 3.48it/s] {'loss': 3.1468, 'learning_rate': 6.882608976707104e-07, 'epoch': 5.55} + 35%|███▍ | 128800/371472 [10:14:24<19:22:51, 3.48it/s] 35%|███▍ | 128801/371472 [10:14:25<18:59:35, 3.55it/s] 35%|███▍ | 128802/371472 [10:14:25<19:37:28, 3.43it/s] 35%|███▍ | 128803/371472 [10:14:25<19:13:29, 3.51it/s] 35%|███▍ | 128804/371472 [10:14:26<19:06:57, 3.53it/s] 35%|███▍ | 128805/371472 [10:14:26<20:20:17, 3.31it/s] 35%|███▍ | 128806/371472 [10:14:26<19:35:12, 3.44it/s] 35%|███▍ | 128807/371472 [10:14:27<19:55:29, 3.38it/s] 35%|███▍ | 128808/371472 [10:14:27<21:23:05, 3.15it/s] 35%|███▍ | 128809/371472 [10:14:27<19:54:34, 3.39it/s] 35%|███▍ | 128810/371472 [10:14:27<19:05:06, 3.53it/s] 35%|███▍ | 128811/371472 [10:14:28<19:12:11, 3.51it/s] 35%|███▍ | 128812/371472 [10:14:28<19:04:04, 3.54it/s] 35%|███▍ | 128813/371472 [10:14:28<18:24:28, 3.66it/s] 35%|███▍ | 128814/371472 [10:14:28<18:34:17, 3.63it/s] 35%|███▍ | 128815/371472 [10:14:29<18:46:09, 3.59it/s] 35%|███▍ | 128816/371472 [10:14:29<18:07:09, 3.72it/s] 35%|███▍ | 128817/371472 [10:14:29<18:39:41, 3.61it/s] 35%|███▍ | 128818/371472 [10:14:30<18:05:32, 3.73it/s] 35%|███▍ | 128819/371472 [10:14:30<19:12:44, 3.51it/s] 35%|███▍ | 128820/371472 [10:14:30<18:29:30, 3.65it/s] {'loss': 3.1386, 'learning_rate': 6.882124156952315e-07, 'epoch': 5.55} + 35%|███▍ | 128820/371472 [10:14:30<18:29:30, 3.65it/s] 35%|███▍ | 128821/371472 [10:14:30<18:38:21, 3.62it/s] 35%|███▍ | 128822/371472 [10:14:31<18:59:43, 3.55it/s] 35%|███▍ | 128823/371472 [10:14:31<18:56:24, 3.56it/s] 35%|███▍ | 128824/371472 [10:14:31<19:49:20, 3.40it/s] 35%|███▍ | 128825/371472 [10:14:32<19:45:41, 3.41it/s] 35%|███▍ | 128826/371472 [10:14:32<18:49:49, 3.58it/s] 35%|███▍ | 128827/371472 [10:14:32<18:13:40, 3.70it/s] 35%|███▍ | 128828/371472 [10:14:32<18:48:50, 3.58it/s] 35%|███▍ | 128829/371472 [10:14:33<18:10:19, 3.71it/s] 35%|███▍ | 128830/371472 [10:14:33<17:43:05, 3.80it/s] 35%|███▍ | 128831/371472 [10:14:33<18:30:47, 3.64it/s] 35%|███▍ | 128832/371472 [10:14:33<18:22:11, 3.67it/s] 35%|███▍ | 128833/371472 [10:14:34<19:48:43, 3.40it/s] 35%|███▍ | 128834/371472 [10:14:34<18:39:27, 3.61it/s] 35%|███▍ | 128835/371472 [10:14:34<20:20:58, 3.31it/s] 35%|███▍ | 128836/371472 [10:14:35<20:26:45, 3.30it/s] 35%|███▍ | 128837/371472 [10:14:35<20:55:29, 3.22it/s] 35%|███▍ | 128838/371472 [10:14:35<19:55:20, 3.38it/s] 35%|███▍ | 128839/371472 [10:14:36<20:04:12, 3.36it/s] 35%|███▍ | 128840/371472 [10:14:36<20:04:45, 3.36it/s] {'loss': 3.0341, 'learning_rate': 6.881639337197526e-07, 'epoch': 5.55} + 35%|███▍ | 128840/371472 [10:14:36<20:04:45, 3.36it/s] 35%|███▍ | 128841/371472 [10:14:36<20:48:03, 3.24it/s] 35%|███▍ | 128842/371472 [10:14:37<21:45:33, 3.10it/s] 35%|███▍ | 128843/371472 [10:14:37<24:26:30, 2.76it/s] 35%|███▍ | 128844/371472 [10:14:37<22:04:37, 3.05it/s] 35%|███▍ | 128845/371472 [10:14:38<20:25:40, 3.30it/s] 35%|███▍ | 128846/371472 [10:14:38<19:37:54, 3.43it/s] 35%|███▍ | 128847/371472 [10:14:38<21:30:06, 3.13it/s] 35%|███▍ | 128848/371472 [10:14:38<20:37:30, 3.27it/s] 35%|███▍ | 128849/371472 [10:14:39<20:05:26, 3.35it/s] 35%|███▍ | 128850/371472 [10:14:39<19:45:54, 3.41it/s] 35%|███▍ | 128851/371472 [10:14:39<19:17:27, 3.49it/s] 35%|███▍ | 128852/371472 [10:14:40<18:58:09, 3.55it/s] 35%|███▍ | 128853/371472 [10:14:40<18:39:49, 3.61it/s] 35%|███▍ | 128854/371472 [10:14:40<18:47:59, 3.58it/s] 35%|███▍ | 128855/371472 [10:14:40<18:16:18, 3.69it/s] 35%|███▍ | 128856/371472 [10:14:41<18:43:21, 3.60it/s] 35%|███▍ | 128857/371472 [10:14:41<18:35:00, 3.63it/s] 35%|███▍ | 128858/371472 [10:14:41<18:25:21, 3.66it/s] 35%|███▍ | 128859/371472 [10:14:42<19:09:01, 3.52it/s] 35%|███▍ | 128860/371472 [10:14:42<19:17:37, 3.49it/s] {'loss': 3.1763, 'learning_rate': 6.881154517442737e-07, 'epoch': 5.55} + 35%|███▍ | 128860/371472 [10:14:42<19:17:37, 3.49it/s] 35%|███▍ | 128861/371472 [10:14:42<19:49:46, 3.40it/s] 35%|███▍ | 128862/371472 [10:14:43<22:17:18, 3.02it/s] 35%|███▍ | 128863/371472 [10:14:43<21:44:45, 3.10it/s] 35%|███▍ | 128864/371472 [10:14:43<21:13:11, 3.18it/s] 35%|███▍ | 128865/371472 [10:14:43<20:09:27, 3.34it/s] 35%|███▍ | 128866/371472 [10:14:44<20:49:23, 3.24it/s] 35%|███▍ | 128867/371472 [10:14:44<19:55:46, 3.38it/s] 35%|███▍ | 128868/371472 [10:14:44<19:07:40, 3.52it/s] 35%|███▍ | 128869/371472 [10:14:44<18:19:23, 3.68it/s] 35%|███▍ | 128870/371472 [10:14:45<19:36:18, 3.44it/s] 35%|███▍ | 128871/371472 [10:14:45<18:28:13, 3.65it/s] 35%|███▍ | 128872/371472 [10:14:45<18:07:18, 3.72it/s] 35%|███▍ | 128873/371472 [10:14:46<18:02:08, 3.74it/s] 35%|███▍ | 128874/371472 [10:14:46<19:37:52, 3.43it/s] 35%|███▍ | 128875/371472 [10:14:46<18:54:51, 3.56it/s] 35%|███▍ | 128876/371472 [10:14:46<19:35:43, 3.44it/s] 35%|███▍ | 128877/371472 [10:14:47<19:05:21, 3.53it/s] 35%|███▍ | 128878/371472 [10:14:47<18:22:49, 3.67it/s] 35%|███▍ | 128879/371472 [10:14:47<17:56:07, 3.76it/s] 35%|███▍ | 128880/371472 [10:14:48<18:16:48, 3.69it/s] {'loss': 3.107, 'learning_rate': 6.880669697687949e-07, 'epoch': 5.55} + 35%|███▍ | 128880/371472 [10:14:48<18:16:48, 3.69it/s] 35%|███▍ | 128881/371472 [10:14:48<17:52:18, 3.77it/s] 35%|███▍ | 128882/371472 [10:14:48<18:59:05, 3.55it/s] 35%|███▍ | 128883/371472 [10:14:48<19:16:08, 3.50it/s] 35%|███▍ | 128884/371472 [10:14:49<18:37:03, 3.62it/s] 35%|███▍ | 128885/371472 [10:14:49<18:32:33, 3.63it/s] 35%|███▍ | 128886/371472 [10:14:49<17:52:44, 3.77it/s] 35%|███▍ | 128887/371472 [10:14:49<17:48:11, 3.78it/s] 35%|███▍ | 128888/371472 [10:14:50<19:11:39, 3.51it/s] 35%|███▍ | 128889/371472 [10:14:50<18:57:38, 3.55it/s] 35%|███▍ | 128890/371472 [10:14:50<18:52:40, 3.57it/s] 35%|███▍ | 128891/371472 [10:14:51<18:21:28, 3.67it/s] 35%|███▍ | 128892/371472 [10:14:51<18:25:45, 3.66it/s] 35%|███▍ | 128893/371472 [10:14:51<19:28:50, 3.46it/s] 35%|███▍ | 128894/371472 [10:14:51<19:19:02, 3.49it/s] 35%|███▍ | 128895/371472 [10:14:52<18:25:54, 3.66it/s] 35%|███▍ | 128896/371472 [10:14:52<18:31:28, 3.64it/s] 35%|███▍ | 128897/371472 [10:14:52<19:57:25, 3.38it/s] 35%|███▍ | 128898/371472 [10:14:53<19:39:23, 3.43it/s] 35%|███▍ | 128899/371472 [10:14:53<21:21:24, 3.16it/s] 35%|███▍ | 128900/371472 [10:14:53<19:54:17, 3.39it/s] {'loss': 3.1413, 'learning_rate': 6.880184877933159e-07, 'epoch': 5.55} + 35%|███▍ | 128900/371472 [10:14:53<19:54:17, 3.39it/s] 35%|███▍ | 128901/371472 [10:14:53<19:08:20, 3.52it/s] 35%|███▍ | 128902/371472 [10:14:54<18:11:01, 3.71it/s] 35%|███▍ | 128903/371472 [10:14:54<17:46:04, 3.79it/s] 35%|███▍ | 128904/371472 [10:14:54<18:41:38, 3.60it/s] 35%|███▍ | 128905/371472 [10:14:55<18:25:34, 3.66it/s] 35%|███▍ | 128906/371472 [10:14:55<19:14:31, 3.50it/s] 35%|███▍ | 128907/371472 [10:14:55<18:54:46, 3.56it/s] 35%|███▍ | 128908/371472 [10:14:55<19:01:22, 3.54it/s] 35%|███▍ | 128909/371472 [10:14:56<18:41:31, 3.60it/s] 35%|███▍ | 128910/371472 [10:14:56<18:07:55, 3.72it/s] 35%|███▍ | 128911/371472 [10:14:56<17:54:44, 3.76it/s] 35%|███▍ | 128912/371472 [10:14:56<17:39:06, 3.82it/s] 35%|███▍ | 128913/371472 [10:14:57<17:53:35, 3.77it/s] 35%|███▍ | 128914/371472 [10:14:57<17:20:45, 3.88it/s] 35%|███▍ | 128915/371472 [10:14:57<17:42:21, 3.81it/s] 35%|███▍ | 128916/371472 [10:14:58<19:50:02, 3.40it/s] 35%|███▍ | 128917/371472 [10:14:58<19:09:52, 3.52it/s] 35%|███▍ | 128918/371472 [10:14:58<19:05:11, 3.53it/s] 35%|███▍ | 128919/371472 [10:14:58<19:03:00, 3.54it/s] 35%|███▍ | 128920/371472 [10:14:59<19:21:46, 3.48it/s] {'loss': 3.0938, 'learning_rate': 6.879700058178369e-07, 'epoch': 5.55} + 35%|███▍ | 128920/371472 [10:14:59<19:21:46, 3.48it/s] 35%|███▍ | 128921/371472 [10:14:59<18:31:41, 3.64it/s] 35%|███▍ | 128922/371472 [10:14:59<18:33:52, 3.63it/s] 35%|███▍ | 128923/371472 [10:15:00<18:18:57, 3.68it/s] 35%|███▍ | 128924/371472 [10:15:00<17:38:58, 3.82it/s] 35%|███▍ | 128925/371472 [10:15:00<17:57:49, 3.75it/s] 35%|███▍ | 128926/371472 [10:15:00<18:04:23, 3.73it/s] 35%|███▍ | 128927/371472 [10:15:01<18:38:48, 3.61it/s] 35%|███▍ | 128928/371472 [10:15:01<21:37:03, 3.12it/s] 35%|███▍ | 128929/371472 [10:15:01<20:39:49, 3.26it/s] 35%|███▍ | 128930/371472 [10:15:02<20:05:11, 3.35it/s] 35%|███▍ | 128931/371472 [10:15:02<18:53:22, 3.57it/s] 35%|███▍ | 128932/371472 [10:15:02<18:14:38, 3.69it/s] 35%|███▍ | 128933/371472 [10:15:02<17:48:11, 3.78it/s] 35%|███▍ | 128934/371472 [10:15:03<18:19:28, 3.68it/s] 35%|███▍ | 128935/371472 [10:15:03<18:32:16, 3.63it/s] 35%|███▍ | 128936/371472 [10:15:03<17:55:39, 3.76it/s] 35%|███▍ | 128937/371472 [10:15:03<18:30:32, 3.64it/s] 35%|███▍ | 128938/371472 [10:15:04<17:58:55, 3.75it/s] 35%|███▍ | 128939/371472 [10:15:04<18:35:44, 3.62it/s] 35%|███▍ | 128940/371472 [10:15:04<19:24:46, 3.47it/s] {'loss': 3.1517, 'learning_rate': 6.879215238423581e-07, 'epoch': 5.55} + 35%|███▍ | 128940/371472 [10:15:04<19:24:46, 3.47it/s] 35%|███▍ | 128941/371472 [10:15:05<18:49:24, 3.58it/s] 35%|███▍ | 128942/371472 [10:15:05<18:33:14, 3.63it/s] 35%|███▍ | 128943/371472 [10:15:05<19:41:38, 3.42it/s] 35%|███▍ | 128944/371472 [10:15:05<19:00:47, 3.54it/s] 35%|███▍ | 128945/371472 [10:15:06<19:26:23, 3.47it/s] 35%|███▍ | 128946/371472 [10:15:06<19:40:39, 3.42it/s] 35%|███▍ | 128947/371472 [10:15:06<19:18:40, 3.49it/s] 35%|███▍ | 128948/371472 [10:15:07<19:29:09, 3.46it/s] 35%|███▍ | 128949/371472 [10:15:07<20:24:43, 3.30it/s] 35%|███▍ | 128950/371472 [10:15:07<20:15:03, 3.33it/s] 35%|███▍ | 128951/371472 [10:15:07<19:52:07, 3.39it/s] 35%|███▍ | 128952/371472 [10:15:08<20:29:14, 3.29it/s] 35%|███▍ | 128953/371472 [10:15:08<20:11:34, 3.34it/s] 35%|███▍ | 128954/371472 [10:15:08<19:58:17, 3.37it/s] 35%|███▍ | 128955/371472 [10:15:09<19:21:39, 3.48it/s] 35%|███▍ | 128956/371472 [10:15:09<18:50:14, 3.58it/s] 35%|███▍ | 128957/371472 [10:15:09<18:31:23, 3.64it/s] 35%|███▍ | 128958/371472 [10:15:09<19:00:39, 3.54it/s] 35%|███▍ | 128959/371472 [10:15:10<20:11:18, 3.34it/s] 35%|███▍ | 128960/371472 [10:15:10<20:26:07, 3.30it/s] {'loss': 3.1688, 'learning_rate': 6.878730418668793e-07, 'epoch': 5.55} + 35%|███▍ | 128960/371472 [10:15:10<20:26:07, 3.30it/s] 35%|███▍ | 128961/371472 [10:15:10<20:00:30, 3.37it/s] 35%|███▍ | 128962/371472 [10:15:11<19:13:14, 3.50it/s] 35%|███▍ | 128963/371472 [10:15:11<18:59:12, 3.55it/s] 35%|███▍ | 128964/371472 [10:15:11<18:24:08, 3.66it/s] 35%|███▍ | 128965/371472 [10:15:11<17:48:48, 3.78it/s] 35%|███▍ | 128966/371472 [10:15:12<17:58:21, 3.75it/s] 35%|███▍ | 128967/371472 [10:15:12<18:11:09, 3.70it/s] 35%|███▍ | 128968/371472 [10:15:12<18:00:44, 3.74it/s] 35%|███▍ | 128969/371472 [10:15:13<17:54:44, 3.76it/s] 35%|███▍ | 128970/371472 [10:15:13<18:09:15, 3.71it/s] 35%|███▍ | 128971/371472 [10:15:13<19:34:14, 3.44it/s] 35%|███▍ | 128972/371472 [10:15:13<19:33:54, 3.44it/s] 35%|███▍ | 128973/371472 [10:15:14<19:21:02, 3.48it/s] 35%|███▍ | 128974/371472 [10:15:14<19:01:08, 3.54it/s] 35%|███▍ | 128975/371472 [10:15:14<18:15:25, 3.69it/s] 35%|███▍ | 128976/371472 [10:15:14<18:18:26, 3.68it/s] 35%|███▍ | 128977/371472 [10:15:15<17:55:06, 3.76it/s] 35%|███▍ | 128978/371472 [10:15:15<17:26:47, 3.86it/s] 35%|███▍ | 128979/371472 [10:15:15<17:09:41, 3.93it/s] 35%|███▍ | 128980/371472 [10:15:16<17:31:42, 3.84it/s] {'loss': 3.2693, 'learning_rate': 6.878245598914005e-07, 'epoch': 5.56} + 35%|███▍ | 128980/371472 [10:15:16<17:31:42, 3.84it/s] 35%|███▍ | 128981/371472 [10:15:16<17:54:46, 3.76it/s] 35%|███▍ | 128982/371472 [10:15:16<17:48:44, 3.78it/s] 35%|███▍ | 128983/371472 [10:15:16<18:26:29, 3.65it/s] 35%|███▍ | 128984/371472 [10:15:17<18:43:29, 3.60it/s] 35%|███▍ | 128985/371472 [10:15:17<18:19:27, 3.68it/s] 35%|███▍ | 128986/371472 [10:15:17<17:43:44, 3.80it/s] 35%|███▍ | 128987/371472 [10:15:17<18:50:12, 3.58it/s] 35%|███▍ | 128988/371472 [10:15:18<18:24:13, 3.66it/s] 35%|███▍ | 128989/371472 [10:15:18<20:48:35, 3.24it/s] 35%|███▍ | 128990/371472 [10:15:18<21:07:15, 3.19it/s] 35%|███▍ | 128991/371472 [10:15:19<22:02:49, 3.06it/s] 35%|███▍ | 128992/371472 [10:15:19<20:20:10, 3.31it/s] 35%|███▍ | 128993/371472 [10:15:19<19:27:06, 3.46it/s] 35%|███▍ | 128994/371472 [10:15:20<18:45:30, 3.59it/s] 35%|███▍ | 128995/371472 [10:15:20<18:31:33, 3.64it/s] 35%|███▍ | 128996/371472 [10:15:20<19:02:59, 3.54it/s] 35%|███▍ | 128997/371472 [10:15:20<19:29:39, 3.46it/s] 35%|███▍ | 128998/371472 [10:15:21<19:32:12, 3.45it/s] 35%|███▍ | 128999/371472 [10:15:21<21:22:23, 3.15it/s] 35%|███▍ | 129000/371472 [10:15:21<21:31:35, 3.13it/s] {'loss': 3.0736, 'learning_rate': 6.877760779159214e-07, 'epoch': 5.56} + 35%|███▍ | 129000/371472 [10:15:21<21:31:35, 3.13it/s] 35%|███▍ | 129001/371472 [10:15:22<19:48:26, 3.40it/s] 35%|███▍ | 129002/371472 [10:15:22<19:55:16, 3.38it/s] 35%|███▍ | 129003/371472 [10:15:22<18:52:59, 3.57it/s] 35%|███▍ | 129004/371472 [10:15:22<18:03:02, 3.73it/s] 35%|███▍ | 129005/371472 [10:15:23<18:34:20, 3.63it/s] 35%|███▍ | 129006/371472 [10:15:23<19:33:11, 3.44it/s] 35%|███▍ | 129007/371472 [10:15:23<20:10:03, 3.34it/s] 35%|███▍ | 129008/371472 [10:15:24<19:33:30, 3.44it/s] 35%|███▍ | 129009/371472 [10:15:24<18:48:18, 3.58it/s] 35%|███▍ | 129010/371472 [10:15:24<18:47:31, 3.58it/s] 35%|███▍ | 129011/371472 [10:15:24<18:42:16, 3.60it/s] 35%|███▍ | 129012/371472 [10:15:25<19:29:42, 3.45it/s] 35%|███▍ | 129013/371472 [10:15:25<19:02:57, 3.54it/s] 35%|███▍ | 129014/371472 [10:15:25<18:22:13, 3.67it/s] 35%|███▍ | 129015/371472 [10:15:26<17:58:15, 3.75it/s] 35%|███▍ | 129016/371472 [10:15:26<17:39:54, 3.81it/s] 35%|███▍ | 129017/371472 [10:15:26<17:46:54, 3.79it/s] 35%|███▍ | 129018/371472 [10:15:26<18:03:20, 3.73it/s] 35%|███▍ | 129019/371472 [10:15:27<19:53:34, 3.39it/s] 35%|███▍ | 129020/371472 [10:15:27<19:47:00, 3.40it/s] {'loss': 3.1875, 'learning_rate': 6.877275959404425e-07, 'epoch': 5.56} + 35%|███▍ | 129020/371472 [10:15:27<19:47:00, 3.40it/s] 35%|███▍ | 129021/371472 [10:15:27<18:57:09, 3.55it/s] 35%|███▍ | 129022/371472 [10:15:27<18:46:42, 3.59it/s] 35%|███▍ | 129023/371472 [10:15:28<19:33:45, 3.44it/s] 35%|███▍ | 129024/371472 [10:15:28<20:16:42, 3.32it/s] 35%|███▍ | 129025/371472 [10:15:29<22:29:48, 2.99it/s] 35%|███▍ | 129026/371472 [10:15:29<20:44:26, 3.25it/s] 35%|███▍ | 129027/371472 [10:15:29<19:45:56, 3.41it/s] 35%|███▍ | 129028/371472 [10:15:29<19:03:51, 3.53it/s] 35%|███▍ | 129029/371472 [10:15:30<19:24:30, 3.47it/s] 35%|███▍ | 129030/371472 [10:15:30<20:44:03, 3.25it/s] 35%|███▍ | 129031/371472 [10:15:30<20:15:32, 3.32it/s] 35%|███▍ | 129032/371472 [10:15:31<21:33:19, 3.12it/s] 35%|███▍ | 129033/371472 [10:15:31<20:50:41, 3.23it/s] 35%|███▍ | 129034/371472 [10:15:31<19:49:14, 3.40it/s] 35%|███▍ | 129035/371472 [10:15:31<19:26:58, 3.46it/s] 35%|███▍ | 129036/371472 [10:15:32<19:15:46, 3.50it/s] 35%|███▍ | 129037/371472 [10:15:32<18:35:12, 3.62it/s] 35%|███▍ | 129038/371472 [10:15:32<18:05:21, 3.72it/s] 35%|███▍ | 129039/371472 [10:15:32<17:56:12, 3.75it/s] 35%|███▍ | 129040/371472 [10:15:33<18:11:50, 3.70it/s] {'loss': 3.2301, 'learning_rate': 6.876791139649637e-07, 'epoch': 5.56} + 35%|███▍ | 129040/371472 [10:15:33<18:11:50, 3.70it/s] 35%|███▍ | 129041/371472 [10:15:33<18:31:10, 3.64it/s] 35%|███▍ | 129042/371472 [10:15:33<20:01:43, 3.36it/s] 35%|███▍ | 129043/371472 [10:15:34<18:54:12, 3.56it/s] 35%|███▍ | 129044/371472 [10:15:34<18:46:52, 3.59it/s] 35%|███▍ | 129045/371472 [10:15:34<18:29:41, 3.64it/s] 35%|███▍ | 129046/371472 [10:15:35<19:32:21, 3.45it/s] 35%|███▍ | 129047/371472 [10:15:35<19:13:06, 3.50it/s] 35%|███▍ | 129048/371472 [10:15:35<19:24:16, 3.47it/s] 35%|███▍ | 129049/371472 [10:15:35<20:41:24, 3.25it/s] 35%|███▍ | 129050/371472 [10:15:36<20:14:06, 3.33it/s] 35%|███▍ | 129051/371472 [10:15:36<20:24:17, 3.30it/s] 35%|███▍ | 129052/371472 [10:15:36<19:05:36, 3.53it/s] 35%|███▍ | 129053/371472 [10:15:37<19:05:24, 3.53it/s] 35%|███▍ | 129054/371472 [10:15:37<20:04:42, 3.35it/s] 35%|███▍ | 129055/371472 [10:15:37<19:19:05, 3.49it/s] 35%|███▍ | 129056/371472 [10:15:37<19:10:35, 3.51it/s] 35%|███▍ | 129057/371472 [10:15:38<19:36:19, 3.43it/s] 35%|███▍ | 129058/371472 [10:15:38<19:15:52, 3.50it/s] 35%|███▍ | 129059/371472 [10:15:38<19:52:16, 3.39it/s] 35%|███▍ | 129060/371472 [10:15:39<18:51:29, 3.57it/s] {'loss': 3.2131, 'learning_rate': 6.876306319894848e-07, 'epoch': 5.56} + 35%|███▍ | 129060/371472 [10:15:39<18:51:29, 3.57it/s] 35%|███▍ | 129061/371472 [10:15:39<20:21:56, 3.31it/s] 35%|███▍ | 129062/371472 [10:15:39<21:18:25, 3.16it/s] 35%|███▍ | 129063/371472 [10:15:40<20:16:12, 3.32it/s] 35%|███▍ | 129064/371472 [10:15:40<19:59:11, 3.37it/s] 35%|███▍ | 129065/371472 [10:15:40<19:27:09, 3.46it/s] 35%|███▍ | 129066/371472 [10:15:40<18:28:14, 3.65it/s] 35%|███▍ | 129067/371472 [10:15:41<19:09:16, 3.52it/s] 35%|███▍ | 129068/371472 [10:15:41<18:37:49, 3.61it/s] 35%|███▍ | 129069/371472 [10:15:41<18:44:18, 3.59it/s] 35%|███▍ | 129070/371472 [10:15:41<18:11:16, 3.70it/s] 35%|███▍ | 129071/371472 [10:15:42<18:09:42, 3.71it/s] 35%|███▍ | 129072/371472 [10:15:42<17:39:01, 3.81it/s] 35%|███▍ | 129073/371472 [10:15:42<18:33:53, 3.63it/s] 35%|███▍ | 129074/371472 [10:15:43<19:01:52, 3.54it/s] 35%|███▍ | 129075/371472 [10:15:43<18:18:58, 3.68it/s] 35%|███▍ | 129076/371472 [10:15:43<17:40:49, 3.81it/s] 35%|███▍ | 129077/371472 [10:15:43<18:52:51, 3.57it/s] 35%|███▍ | 129078/371472 [10:15:44<18:40:38, 3.60it/s] 35%|███▍ | 129079/371472 [10:15:44<18:20:19, 3.67it/s] 35%|███▍ | 129080/371472 [10:15:44<17:58:53, 3.74it/s] {'loss': 3.3486, 'learning_rate': 6.875821500140058e-07, 'epoch': 5.56} + 35%|███▍ | 129080/371472 [10:15:44<17:58:53, 3.74it/s] 35%|███▍ | 129081/371472 [10:15:44<18:14:07, 3.69it/s] 35%|███▍ | 129082/371472 [10:15:45<18:01:25, 3.74it/s] 35%|███▍ | 129083/371472 [10:15:45<18:01:56, 3.73it/s] 35%|███▍ | 129084/371472 [10:15:45<18:17:09, 3.68it/s] 35%|███▍ | 129085/371472 [10:15:45<17:59:24, 3.74it/s] 35%|███▍ | 129086/371472 [10:15:46<18:01:09, 3.74it/s] 35%|███▍ | 129087/371472 [10:15:46<17:51:11, 3.77it/s] 35%|███▍ | 129088/371472 [10:15:46<17:10:26, 3.92it/s] 35%|███▍ | 129089/371472 [10:15:47<17:39:00, 3.81it/s] 35%|███▍ | 129090/371472 [10:15:47<17:46:02, 3.79it/s] 35%|███▍ | 129091/371472 [10:15:47<18:15:24, 3.69it/s] 35%|███▍ | 129092/371472 [10:15:47<17:45:16, 3.79it/s] 35%|███▍ | 129093/371472 [10:15:48<17:30:19, 3.85it/s] 35%|███▍ | 129094/371472 [10:15:48<17:20:55, 3.88it/s] 35%|███▍ | 129095/371472 [10:15:48<17:31:07, 3.84it/s] 35%|███▍ | 129096/371472 [10:15:48<18:01:49, 3.73it/s] 35%|███▍ | 129097/371472 [10:15:49<19:09:43, 3.51it/s] 35%|███▍ | 129098/371472 [10:15:49<19:03:52, 3.53it/s] 35%|███▍ | 129099/371472 [10:15:49<18:33:48, 3.63it/s] 35%|███▍ | 129100/371472 [10:15:49<18:07:43, 3.71it/s] {'loss': 3.0836, 'learning_rate': 6.87533668038527e-07, 'epoch': 5.56} + 35%|███▍ | 129100/371472 [10:15:49<18:07:43, 3.71it/s] 35%|███▍ | 129101/371472 [10:15:50<17:54:27, 3.76it/s] 35%|███▍ | 129102/371472 [10:15:50<18:01:04, 3.74it/s] 35%|███▍ | 129103/371472 [10:15:50<17:43:40, 3.80it/s] 35%|███▍ | 129104/371472 [10:15:51<17:29:57, 3.85it/s] 35%|███▍ | 129105/371472 [10:15:51<17:13:09, 3.91it/s] 35%|███▍ | 129106/371472 [10:15:51<18:39:40, 3.61it/s] 35%|███▍ | 129107/371472 [10:15:51<18:33:22, 3.63it/s] 35%|███▍ | 129108/371472 [10:15:52<19:22:26, 3.47it/s] 35%|███▍ | 129109/371472 [10:15:52<19:36:16, 3.43it/s] 35%|███▍ | 129110/371472 [10:15:52<18:53:38, 3.56it/s] 35%|███▍ | 129111/371472 [10:15:53<18:50:18, 3.57it/s] 35%|███▍ | 129112/371472 [10:15:53<18:41:19, 3.60it/s] 35%|███▍ | 129113/371472 [10:15:53<18:16:06, 3.69it/s] 35%|███▍ | 129114/371472 [10:15:53<18:43:33, 3.60it/s] 35%|███▍ | 129115/371472 [10:15:54<19:02:03, 3.54it/s] 35%|███▍ | 129116/371472 [10:15:54<18:37:36, 3.61it/s] 35%|███▍ | 129117/371472 [10:15:54<18:50:53, 3.57it/s] 35%|███▍ | 129118/371472 [10:15:55<20:01:57, 3.36it/s] 35%|███▍ | 129119/371472 [10:15:55<20:07:16, 3.35it/s] 35%|███▍ | 129120/371472 [10:15:55<21:02:06, 3.20it/s] {'loss': 3.0602, 'learning_rate': 6.874851860630482e-07, 'epoch': 5.56} + 35%|███▍ | 129120/371472 [10:15:55<21:02:06, 3.20it/s] 35%|███▍ | 129121/371472 [10:15:55<19:54:57, 3.38it/s] 35%|███▍ | 129122/371472 [10:15:56<19:40:15, 3.42it/s] 35%|███▍ | 129123/371472 [10:15:56<19:32:49, 3.44it/s] 35%|███▍ | 129124/371472 [10:15:56<19:27:00, 3.46it/s] 35%|███▍ | 129125/371472 [10:15:57<19:04:23, 3.53it/s] 35%|███▍ | 129126/371472 [10:15:57<19:23:45, 3.47it/s] 35%|███▍ | 129127/371472 [10:15:57<18:57:52, 3.55it/s] 35%|███▍ | 129128/371472 [10:15:57<19:21:24, 3.48it/s] 35%|███▍ | 129129/371472 [10:15:58<19:27:36, 3.46it/s] 35%|███▍ | 129130/371472 [10:15:58<20:47:50, 3.24it/s] 35%|███▍ | 129131/371472 [10:15:58<19:57:23, 3.37it/s] 35%|███▍ | 129132/371472 [10:15:59<19:46:23, 3.40it/s] 35%|███▍ | 129133/371472 [10:15:59<18:59:15, 3.55it/s] 35%|███▍ | 129134/371472 [10:15:59<20:01:15, 3.36it/s] 35%|███▍ | 129135/371472 [10:15:59<19:25:43, 3.46it/s] 35%|███▍ | 129136/371472 [10:16:00<19:46:16, 3.40it/s] 35%|███▍ | 129137/371472 [10:16:00<20:21:08, 3.31it/s] 35%|███▍ | 129138/371472 [10:16:00<19:42:11, 3.42it/s] 35%|███▍ | 129139/371472 [10:16:01<20:27:18, 3.29it/s] 35%|███▍ | 129140/371472 [10:16:01<19:40:11, 3.42it/s] {'loss': 3.1346, 'learning_rate': 6.874367040875693e-07, 'epoch': 5.56} + 35%|███▍ | 129140/371472 [10:16:01<19:40:11, 3.42it/s] 35%|███▍ | 129141/371472 [10:16:01<19:12:04, 3.51it/s] 35%|███▍ | 129142/371472 [10:16:01<18:22:06, 3.66it/s] 35%|███▍ | 129143/371472 [10:16:02<19:13:07, 3.50it/s] 35%|███▍ | 129144/371472 [10:16:02<18:28:30, 3.64it/s] 35%|███▍ | 129145/371472 [10:16:02<18:01:46, 3.73it/s] 35%|███▍ | 129146/371472 [10:16:03<18:20:05, 3.67it/s] 35%|███▍ | 129147/371472 [10:16:03<19:10:16, 3.51it/s] 35%|███▍ | 129148/371472 [10:16:03<18:55:06, 3.56it/s] 35%|███▍ | 129149/371472 [10:16:03<19:17:09, 3.49it/s] 35%|███▍ | 129150/371472 [10:16:04<20:06:06, 3.35it/s] 35%|███▍ | 129151/371472 [10:16:04<19:27:04, 3.46it/s] 35%|███▍ | 129152/371472 [10:16:04<20:22:50, 3.30it/s] 35%|███▍ | 129153/371472 [10:16:05<19:31:18, 3.45it/s] 35%|███▍ | 129154/371472 [10:16:05<18:33:52, 3.63it/s] 35%|███▍ | 129155/371472 [10:16:05<17:53:27, 3.76it/s] 35%|███▍ | 129156/371472 [10:16:05<17:44:40, 3.79it/s] 35%|███▍ | 129157/371472 [10:16:06<17:26:25, 3.86it/s] 35%|███▍ | 129158/371472 [10:16:06<17:54:33, 3.76it/s] 35%|███▍ | 129159/371472 [10:16:06<17:27:12, 3.86it/s] 35%|███▍ | 129160/371472 [10:16:06<17:33:52, 3.83it/s] {'loss': 3.3999, 'learning_rate': 6.873882221120902e-07, 'epoch': 5.56} + 35%|███▍ | 129160/371472 [10:16:06<17:33:52, 3.83it/s] 35%|███▍ | 129161/371472 [10:16:07<17:54:51, 3.76it/s] 35%|███▍ | 129162/371472 [10:16:07<18:11:39, 3.70it/s] 35%|███▍ | 129163/371472 [10:16:07<17:55:30, 3.75it/s] 35%|███▍ | 129164/371472 [10:16:08<18:34:31, 3.62it/s] 35%|███▍ | 129165/371472 [10:16:08<17:58:43, 3.74it/s] 35%|███▍ | 129166/371472 [10:16:08<17:26:27, 3.86it/s] 35%|███▍ | 129167/371472 [10:16:08<17:36:10, 3.82it/s] 35%|███▍ | 129168/371472 [10:16:09<17:15:37, 3.90it/s] 35%|███▍ | 129169/371472 [10:16:09<18:07:59, 3.71it/s] 35%|███▍ | 129170/371472 [10:16:09<17:52:22, 3.77it/s] 35%|███▍ | 129171/371472 [10:16:09<19:46:13, 3.40it/s] 35%|███▍ | 129172/371472 [10:16:10<18:40:10, 3.61it/s] 35%|███▍ | 129173/371472 [10:16:10<18:37:14, 3.61it/s] 35%|███▍ | 129174/371472 [10:16:10<17:54:47, 3.76it/s] 35%|███▍ | 129175/371472 [10:16:11<18:03:02, 3.73it/s] 35%|███▍ | 129176/371472 [10:16:11<17:40:42, 3.81it/s] 35%|███▍ | 129177/371472 [10:16:11<18:24:27, 3.66it/s] 35%|███▍ | 129178/371472 [10:16:11<18:12:35, 3.70it/s] 35%|███▍ | 129179/371472 [10:16:12<18:25:19, 3.65it/s] 35%|███▍ | 129180/371472 [10:16:12<19:16:52, 3.49it/s] {'loss': 3.2813, 'learning_rate': 6.873397401366114e-07, 'epoch': 5.56} + 35%|███▍ | 129180/371472 [10:16:12<19:16:52, 3.49it/s] 35%|███▍ | 129181/371472 [10:16:12<19:14:39, 3.50it/s] 35%|███▍ | 129182/371472 [10:16:12<18:50:44, 3.57it/s] 35%|███▍ | 129183/371472 [10:16:13<18:16:21, 3.68it/s] 35%|███▍ | 129184/371472 [10:16:13<17:44:48, 3.79it/s] 35%|███▍ | 129185/371472 [10:16:13<17:50:39, 3.77it/s] 35%|███▍ | 129186/371472 [10:16:14<18:08:44, 3.71it/s] 35%|███▍ | 129187/371472 [10:16:14<19:08:34, 3.52it/s] 35%|███▍ | 129188/371472 [10:16:14<19:20:46, 3.48it/s] 35%|███▍ | 129189/371472 [10:16:14<19:12:57, 3.50it/s] 35%|███▍ | 129190/371472 [10:16:15<21:56:04, 3.07it/s] 35%|███▍ | 129191/371472 [10:16:15<21:14:37, 3.17it/s] 35%|███▍ | 129192/371472 [10:16:15<20:20:58, 3.31it/s] 35%|███▍ | 129193/371472 [10:16:16<19:28:24, 3.46it/s] 35%|███▍ | 129194/371472 [10:16:16<19:41:18, 3.42it/s] 35%|███▍ | 129195/371472 [10:16:16<20:10:27, 3.34it/s] 35%|███▍ | 129196/371472 [10:16:17<19:05:21, 3.53it/s] 35%|███▍ | 129197/371472 [10:16:17<18:27:14, 3.65it/s] 35%|███▍ | 129198/371472 [10:16:17<19:12:36, 3.50it/s] 35%|███▍ | 129199/371472 [10:16:17<19:34:51, 3.44it/s] 35%|███▍ | 129200/371472 [10:16:18<19:35:47, 3.43it/s] {'loss': 3.1085, 'learning_rate': 6.872912581611326e-07, 'epoch': 5.56} + 35%|███▍ | 129200/371472 [10:16:18<19:35:47, 3.43it/s] 35%|███▍ | 129201/371472 [10:16:18<19:23:15, 3.47it/s] 35%|███▍ | 129202/371472 [10:16:18<18:42:40, 3.60it/s] 35%|███▍ | 129203/371472 [10:16:19<19:28:32, 3.46it/s] 35%|███▍ | 129204/371472 [10:16:19<19:09:01, 3.51it/s] 35%|███▍ | 129205/371472 [10:16:19<19:33:10, 3.44it/s] 35%|███▍ | 129206/371472 [10:16:19<19:02:00, 3.54it/s] 35%|███▍ | 129207/371472 [10:16:20<19:11:43, 3.51it/s] 35%|███▍ | 129208/371472 [10:16:20<18:34:14, 3.62it/s] 35%|███▍ | 129209/371472 [10:16:20<18:10:00, 3.70it/s] 35%|███▍ | 129210/371472 [10:16:20<18:56:50, 3.55it/s] 35%|███▍ | 129211/371472 [10:16:21<18:25:01, 3.65it/s] 35%|███▍ | 129212/371472 [10:16:21<18:24:23, 3.66it/s] 35%|███▍ | 129213/371472 [10:16:21<19:03:36, 3.53it/s] 35%|███▍ | 129214/371472 [10:16:22<19:24:52, 3.47it/s] 35%|███▍ | 129215/371472 [10:16:22<18:38:10, 3.61it/s] 35%|███▍ | 129216/371472 [10:16:22<18:04:47, 3.72it/s] 35%|███▍ | 129217/371472 [10:16:22<18:53:41, 3.56it/s] 35%|███▍ | 129218/371472 [10:16:23<18:36:02, 3.62it/s] 35%|███▍ | 129219/371472 [10:16:23<18:22:15, 3.66it/s] 35%|███▍ | 129220/371472 [10:16:23<18:21:44, 3.66it/s] {'loss': 3.439, 'learning_rate': 6.872427761856535e-07, 'epoch': 5.57} + 35%|��██▍ | 129220/371472 [10:16:23<18:21:44, 3.66it/s] 35%|███▍ | 129221/371472 [10:16:23<18:14:35, 3.69it/s] 35%|███▍ | 129222/371472 [10:16:24<18:17:56, 3.68it/s] 35%|███▍ | 129223/371472 [10:16:24<18:31:25, 3.63it/s] 35%|███▍ | 129224/371472 [10:16:24<19:02:47, 3.53it/s] 35%|███▍ | 129225/371472 [10:16:25<18:44:31, 3.59it/s] 35%|███▍ | 129226/371472 [10:16:25<18:24:36, 3.66it/s] 35%|███▍ | 129227/371472 [10:16:25<18:48:55, 3.58it/s] 35%|███▍ | 129228/371472 [10:16:26<20:13:55, 3.33it/s] 35%|███▍ | 129229/371472 [10:16:26<21:06:46, 3.19it/s] 35%|███▍ | 129230/371472 [10:16:26<20:46:01, 3.24it/s] 35%|███▍ | 129231/371472 [10:16:26<20:11:24, 3.33it/s] 35%|███▍ | 129232/371472 [10:16:27<19:16:10, 3.49it/s] 35%|███▍ | 129233/371472 [10:16:27<19:27:14, 3.46it/s] 35%|███▍ | 129234/371472 [10:16:27<19:43:17, 3.41it/s] 35%|███▍ | 129235/371472 [10:16:28<19:21:03, 3.48it/s] 35%|███▍ | 129236/371472 [10:16:28<19:18:05, 3.49it/s] 35%|███▍ | 129237/371472 [10:16:28<19:27:41, 3.46it/s] 35%|███▍ | 129238/371472 [10:16:28<20:30:52, 3.28it/s] 35%|███▍ | 129239/371472 [10:16:29<19:34:26, 3.44it/s] 35%|███▍ | 129240/371472 [10:16:29<20:39:14, 3.26it/s] {'loss': 3.1544, 'learning_rate': 6.871942942101747e-07, 'epoch': 5.57} + 35%|███▍ | 129240/371472 [10:16:29<20:39:14, 3.26it/s] 35%|███▍ | 129241/371472 [10:16:29<20:22:25, 3.30it/s] 35%|███▍ | 129242/371472 [10:16:30<19:33:59, 3.44it/s] 35%|███▍ | 129243/371472 [10:16:30<19:09:47, 3.51it/s] 35%|███▍ | 129244/371472 [10:16:30<18:26:26, 3.65it/s] 35%|███▍ | 129245/371472 [10:16:30<17:41:13, 3.80it/s] 35%|███▍ | 129246/371472 [10:16:31<18:52:42, 3.56it/s] 35%|███▍ | 129247/371472 [10:16:31<18:34:32, 3.62it/s] 35%|███▍ | 129248/371472 [10:16:31<19:27:04, 3.46it/s] 35%|███▍ | 129249/371472 [10:16:32<19:03:10, 3.53it/s] 35%|███▍ | 129250/371472 [10:16:32<18:22:26, 3.66it/s] 35%|███▍ | 129251/371472 [10:16:32<17:32:16, 3.84it/s] 35%|███▍ | 129252/371472 [10:16:32<17:54:14, 3.76it/s] 35%|███▍ | 129253/371472 [10:16:33<19:31:15, 3.45it/s] 35%|███▍ | 129254/371472 [10:16:33<19:10:24, 3.51it/s] 35%|███▍ | 129255/371472 [10:16:33<18:46:16, 3.58it/s] 35%|███▍ | 129256/371472 [10:16:34<18:46:34, 3.58it/s] 35%|███▍ | 129257/371472 [10:16:34<19:10:49, 3.51it/s] 35%|███▍ | 129258/371472 [10:16:34<18:23:59, 3.66it/s] 35%|███▍ | 129259/371472 [10:16:34<18:00:32, 3.74it/s] 35%|███▍ | 129260/371472 [10:16:35<18:10:42, 3.70it/s] {'loss': 3.1114, 'learning_rate': 6.871458122346959e-07, 'epoch': 5.57} + 35%|███▍ | 129260/371472 [10:16:35<18:10:42, 3.70it/s] 35%|███▍ | 129261/371472 [10:16:35<18:19:53, 3.67it/s] 35%|███▍ | 129262/371472 [10:16:35<18:12:17, 3.70it/s] 35%|███▍ | 129263/371472 [10:16:35<18:04:18, 3.72it/s] 35%|███▍ | 129264/371472 [10:16:36<18:56:55, 3.55it/s] 35%|███▍ | 129265/371472 [10:16:36<18:34:18, 3.62it/s] 35%|███▍ | 129266/371472 [10:16:36<18:55:56, 3.55it/s] 35%|███▍ | 129267/371472 [10:16:37<18:37:01, 3.61it/s] 35%|███▍ | 129268/371472 [10:16:37<18:07:01, 3.71it/s] 35%|███▍ | 129269/371472 [10:16:37<18:42:49, 3.60it/s] 35%|███▍ | 129270/371472 [10:16:37<18:04:29, 3.72it/s] 35%|███▍ | 129271/371472 [10:16:38<18:07:14, 3.71it/s] 35%|███▍ | 129272/371472 [10:16:38<18:01:03, 3.73it/s] 35%|███▍ | 129273/371472 [10:16:38<18:58:19, 3.55it/s] 35%|███▍ | 129274/371472 [10:16:38<18:58:03, 3.55it/s] 35%|███▍ | 129275/371472 [10:16:39<18:27:48, 3.64it/s] 35%|███▍ | 129276/371472 [10:16:39<18:28:22, 3.64it/s] 35%|███▍ | 129277/371472 [10:16:39<19:42:08, 3.41it/s] 35%|███▍ | 129278/371472 [10:16:40<21:03:05, 3.20it/s] 35%|███▍ | 129279/371472 [10:16:40<20:03:38, 3.35it/s] 35%|███▍ | 129280/371472 [10:16:40<20:07:57, 3.34it/s] {'loss': 3.121, 'learning_rate': 6.870973302592169e-07, 'epoch': 5.57} + 35%|███▍ | 129280/371472 [10:16:40<20:07:57, 3.34it/s] 35%|███▍ | 129281/371472 [10:16:41<22:09:51, 3.04it/s] 35%|███▍ | 129282/371472 [10:16:41<21:03:56, 3.19it/s] 35%|███▍ | 129283/371472 [10:16:41<20:54:39, 3.22it/s] 35%|███▍ | 129284/371472 [10:16:41<19:51:22, 3.39it/s] 35%|███▍ | 129285/371472 [10:16:42<19:15:34, 3.49it/s] 35%|███▍ | 129286/371472 [10:16:42<19:50:38, 3.39it/s] 35%|███▍ | 129287/371472 [10:16:42<19:34:19, 3.44it/s] 35%|███▍ | 129288/371472 [10:16:43<19:43:52, 3.41it/s] 35%|███▍ | 129289/371472 [10:16:43<19:41:25, 3.42it/s] 35%|███▍ | 129290/371472 [10:16:43<18:57:53, 3.55it/s] 35%|███▍ | 129291/371472 [10:16:43<18:31:13, 3.63it/s] 35%|███▍ | 129292/371472 [10:16:44<18:18:24, 3.67it/s] 35%|███▍ | 129293/371472 [10:16:44<18:43:27, 3.59it/s] 35%|███▍ | 129294/371472 [10:16:44<18:53:54, 3.56it/s] 35%|███▍ | 129295/371472 [10:16:45<18:49:54, 3.57it/s] 35%|███▍ | 129296/371472 [10:16:45<19:01:11, 3.54it/s] 35%|███▍ | 129297/371472 [10:16:45<19:02:36, 3.53it/s] 35%|███▍ | 129298/371472 [10:16:45<19:03:29, 3.53it/s] 35%|███▍ | 129299/371472 [10:16:46<18:43:02, 3.59it/s] 35%|███▍ | 129300/371472 [10:16:46<18:43:29, 3.59it/s] {'loss': 3.23, 'learning_rate': 6.87048848283738e-07, 'epoch': 5.57} + 35%|███▍ | 129300/371472 [10:16:46<18:43:29, 3.59it/s] 35%|███▍ | 129301/371472 [10:16:46<18:15:43, 3.68it/s] 35%|███▍ | 129302/371472 [10:16:47<19:05:16, 3.52it/s] 35%|███▍ | 129303/371472 [10:16:47<18:54:51, 3.56it/s] 35%|███▍ | 129304/371472 [10:16:47<18:42:00, 3.60it/s] 35%|███▍ | 129305/371472 [10:16:47<18:58:51, 3.54it/s] 35%|███▍ | 129306/371472 [10:16:48<18:50:09, 3.57it/s] 35%|███▍ | 129307/371472 [10:16:48<19:36:39, 3.43it/s] 35%|███▍ | 129308/371472 [10:16:48<20:00:47, 3.36it/s] 35%|███▍ | 129309/371472 [10:16:49<19:35:20, 3.43it/s] 35%|███▍ | 129310/371472 [10:16:49<19:43:55, 3.41it/s] 35%|███▍ | 129311/371472 [10:16:49<19:51:36, 3.39it/s] 35%|███▍ | 129312/371472 [10:16:49<18:55:08, 3.56it/s] 35%|███▍ | 129313/371472 [10:16:50<20:54:15, 3.22it/s] 35%|███▍ | 129314/371472 [10:16:50<20:14:07, 3.32it/s] 35%|███▍ | 129315/371472 [10:16:50<20:06:58, 3.34it/s] 35%|███▍ | 129316/371472 [10:16:51<19:06:33, 3.52it/s] 35%|███▍ | 129317/371472 [10:16:51<20:23:20, 3.30it/s] 35%|███▍ | 129318/371472 [10:16:51<20:37:10, 3.26it/s] 35%|███▍ | 129319/371472 [10:16:52<20:01:53, 3.36it/s] 35%|███▍ | 129320/371472 [10:16:52<20:00:05, 3.36it/s] {'loss': 3.2205, 'learning_rate': 6.870003663082591e-07, 'epoch': 5.57} + 35%|███▍ | 129320/371472 [10:16:52<20:00:05, 3.36it/s] 35%|███▍ | 129321/371472 [10:16:52<19:19:36, 3.48it/s] 35%|███▍ | 129322/371472 [10:16:52<19:23:33, 3.47it/s] 35%|███▍ | 129323/371472 [10:16:53<18:43:03, 3.59it/s] 35%|███▍ | 129324/371472 [10:16:53<18:46:07, 3.58it/s] 35%|███▍ | 129325/371472 [10:16:53<18:04:30, 3.72it/s] 35%|███▍ | 129326/371472 [10:16:53<18:21:09, 3.67it/s] 35%|███▍ | 129327/371472 [10:16:54<17:53:47, 3.76it/s] 35%|███▍ | 129328/371472 [10:16:54<20:23:09, 3.30it/s] 35%|███▍ | 129329/371472 [10:16:54<19:47:31, 3.40it/s] 35%|███▍ | 129330/371472 [10:16:55<20:31:29, 3.28it/s] 35%|███▍ | 129331/371472 [10:16:55<21:05:21, 3.19it/s] 35%|███▍ | 129332/371472 [10:16:55<20:42:37, 3.25it/s] 35%|███▍ | 129333/371472 [10:16:56<20:44:52, 3.24it/s] 35%|███▍ | 129334/371472 [10:16:56<19:59:48, 3.36it/s] 35%|███▍ | 129335/371472 [10:16:56<19:46:38, 3.40it/s] 35%|███▍ | 129336/371472 [10:16:56<19:04:22, 3.53it/s] 35%|███▍ | 129337/371472 [10:16:57<18:06:50, 3.71it/s] 35%|███▍ | 129338/371472 [10:16:57<17:59:05, 3.74it/s] 35%|███▍ | 129339/371472 [10:16:57<19:35:52, 3.43it/s] 35%|███▍ | 129340/371472 [10:16:58<20:03:36, 3.35it/s] {'loss': 3.2211, 'learning_rate': 6.869518843327803e-07, 'epoch': 5.57} + 35%|███▍ | 129340/371472 [10:16:58<20:03:36, 3.35it/s] 35%|███▍ | 129341/371472 [10:16:58<19:55:15, 3.38it/s] 35%|███▍ | 129342/371472 [10:16:58<20:07:53, 3.34it/s] 35%|███▍ | 129343/371472 [10:16:59<21:04:49, 3.19it/s] 35%|███▍ | 129344/371472 [10:16:59<19:53:08, 3.38it/s] 35%|███▍ | 129345/371472 [10:16:59<19:23:51, 3.47it/s] 35%|███▍ | 129346/371472 [10:16:59<19:14:17, 3.50it/s] 35%|███▍ | 129347/371472 [10:17:00<18:22:50, 3.66it/s] 35%|███▍ | 129348/371472 [10:17:00<18:04:08, 3.72it/s] 35%|███▍ | 129349/371472 [10:17:00<17:42:27, 3.80it/s] 35%|███▍ | 129350/371472 [10:17:00<17:42:58, 3.80it/s] 35%|███▍ | 129351/371472 [10:17:01<17:38:15, 3.81it/s] 35%|███▍ | 129352/371472 [10:17:01<17:25:53, 3.86it/s] 35%|███▍ | 129353/371472 [10:17:01<17:47:52, 3.78it/s] 35%|███▍ | 129354/371472 [10:17:01<18:28:50, 3.64it/s] 35%|███▍ | 129355/371472 [10:17:02<18:31:24, 3.63it/s] 35%|███▍ | 129356/371472 [10:17:02<18:22:43, 3.66it/s] 35%|███▍ | 129357/371472 [10:17:02<17:37:59, 3.81it/s] 35%|███▍ | 129358/371472 [10:17:03<17:56:49, 3.75it/s] 35%|███▍ | 129359/371472 [10:17:03<18:06:46, 3.71it/s] 35%|███▍ | 129360/371472 [10:17:03<18:06:31, 3.71it/s] {'loss': 3.1302, 'learning_rate': 6.869034023573014e-07, 'epoch': 5.57} + 35%|███▍ | 129360/371472 [10:17:03<18:06:31, 3.71it/s] 35%|███▍ | 129361/371472 [10:17:03<18:27:55, 3.64it/s] 35%|███▍ | 129362/371472 [10:17:04<18:48:13, 3.58it/s] 35%|███▍ | 129363/371472 [10:17:04<18:47:52, 3.58it/s] 35%|███▍ | 129364/371472 [10:17:04<18:44:02, 3.59it/s] 35%|███▍ | 129365/371472 [10:17:04<18:45:39, 3.58it/s] 35%|███▍ | 129366/371472 [10:17:05<18:54:21, 3.56it/s] 35%|███▍ | 129367/371472 [10:17:05<18:42:34, 3.59it/s] 35%|███▍ | 129368/371472 [10:17:05<20:09:17, 3.34it/s] 35%|███▍ | 129369/371472 [10:17:06<21:01:47, 3.20it/s] 35%|███▍ | 129370/371472 [10:17:06<21:59:15, 3.06it/s] 35%|███▍ | 129371/371472 [10:17:06<21:51:23, 3.08it/s] 35%|███▍ | 129372/371472 [10:17:07<21:09:38, 3.18it/s] 35%|███▍ | 129373/371472 [10:17:07<20:51:53, 3.22it/s] 35%|███▍ | 129374/371472 [10:17:07<20:17:54, 3.31it/s] 35%|███▍ | 129375/371472 [10:17:08<19:34:26, 3.44it/s] 35%|███▍ | 129376/371472 [10:17:08<19:49:20, 3.39it/s] 35%|███▍ | 129377/371472 [10:17:08<20:49:36, 3.23it/s] 35%|███▍ | 129378/371472 [10:17:09<20:38:45, 3.26it/s] 35%|███▍ | 129379/371472 [10:17:09<20:14:30, 3.32it/s] 35%|███▍ | 129380/371472 [10:17:09<20:39:11, 3.26it/s] {'loss': 3.0789, 'learning_rate': 6.868549203818224e-07, 'epoch': 5.57} + 35%|███▍ | 129380/371472 [10:17:09<20:39:11, 3.26it/s] 35%|███▍ | 129381/371472 [10:17:09<20:00:50, 3.36it/s] 35%|███▍ | 129382/371472 [10:17:10<18:54:25, 3.56it/s] 35%|███▍ | 129383/371472 [10:17:10<19:59:44, 3.36it/s] 35%|███▍ | 129384/371472 [10:17:10<19:52:40, 3.38it/s] 35%|███▍ | 129385/371472 [10:17:11<19:20:40, 3.48it/s] 35%|███▍ | 129386/371472 [10:17:11<18:38:20, 3.61it/s] 35%|███▍ | 129387/371472 [10:17:11<19:08:01, 3.51it/s] 35%|███▍ | 129388/371472 [10:17:11<20:25:44, 3.29it/s] 35%|███▍ | 129389/371472 [10:17:12<21:31:50, 3.12it/s] 35%|███▍ | 129390/371472 [10:17:12<19:57:00, 3.37it/s] 35%|███▍ | 129391/371472 [10:17:12<20:08:56, 3.34it/s] 35%|███▍ | 129392/371472 [10:17:13<19:40:31, 3.42it/s] 35%|███▍ | 129393/371472 [10:17:13<18:48:17, 3.58it/s] 35%|███▍ | 129394/371472 [10:17:13<18:18:37, 3.67it/s] 35%|███▍ | 129395/371472 [10:17:13<18:29:42, 3.64it/s] 35%|███▍ | 129396/371472 [10:17:14<18:51:14, 3.57it/s] 35%|███▍ | 129397/371472 [10:17:14<18:25:45, 3.65it/s] 35%|███▍ | 129398/371472 [10:17:14<17:48:08, 3.78it/s] 35%|███▍ | 129399/371472 [10:17:15<20:00:37, 3.36it/s] 35%|███▍ | 129400/371472 [10:17:15<19:24:20, 3.47it/s] {'loss': 3.2549, 'learning_rate': 6.868064384063435e-07, 'epoch': 5.57} + 35%|███▍ | 129400/371472 [10:17:15<19:24:20, 3.47it/s] 35%|███▍ | 129401/371472 [10:17:15<20:11:11, 3.33it/s] 35%|███▍ | 129402/371472 [10:17:15<19:39:40, 3.42it/s] 35%|███▍ | 129403/371472 [10:17:16<18:47:48, 3.58it/s] 35%|███▍ | 129404/371472 [10:17:16<18:40:19, 3.60it/s] 35%|███▍ | 129405/371472 [10:17:16<18:12:09, 3.69it/s] 35%|███▍ | 129406/371472 [10:17:16<17:55:04, 3.75it/s] 35%|███▍ | 129407/371472 [10:17:17<18:05:02, 3.72it/s] 35%|███▍ | 129408/371472 [10:17:17<18:41:08, 3.60it/s] 35%|███▍ | 129409/371472 [10:17:17<18:43:51, 3.59it/s] 35%|███▍ | 129410/371472 [10:17:18<18:20:45, 3.67it/s] 35%|███▍ | 129411/371472 [10:17:18<18:25:44, 3.65it/s] 35%|███▍ | 129412/371472 [10:17:18<18:20:10, 3.67it/s] 35%|��██▍ | 129413/371472 [10:17:18<18:22:26, 3.66it/s] 35%|███▍ | 129414/371472 [10:17:19<17:58:33, 3.74it/s] 35%|███▍ | 129415/371472 [10:17:19<18:16:27, 3.68it/s] 35%|███▍ | 129416/371472 [10:17:19<18:31:06, 3.63it/s] 35%|███▍ | 129417/371472 [10:17:20<18:28:09, 3.64it/s] 35%|███▍ | 129418/371472 [10:17:20<19:11:23, 3.50it/s] 35%|███▍ | 129419/371472 [10:17:20<18:21:40, 3.66it/s] 35%|███▍ | 129420/371472 [10:17:20<17:47:55, 3.78it/s] {'loss': 3.1204, 'learning_rate': 6.867579564308647e-07, 'epoch': 5.57} + 35%|███▍ | 129420/371472 [10:17:20<17:47:55, 3.78it/s] 35%|███▍ | 129421/371472 [10:17:21<18:07:17, 3.71it/s] 35%|███▍ | 129422/371472 [10:17:21<18:01:47, 3.73it/s] 35%|███▍ | 129423/371472 [10:17:21<17:32:07, 3.83it/s] 35%|███▍ | 129424/371472 [10:17:21<17:30:45, 3.84it/s] 35%|███▍ | 129425/371472 [10:17:22<18:08:38, 3.71it/s] 35%|███▍ | 129426/371472 [10:17:22<18:32:58, 3.62it/s] 35%|███▍ | 129427/371472 [10:17:22<19:36:06, 3.43it/s] 35%|███▍ | 129428/371472 [10:17:23<19:00:22, 3.54it/s] 35%|███▍ | 129429/371472 [10:17:23<19:58:04, 3.37it/s] 35%|███▍ | 129430/371472 [10:17:23<19:57:48, 3.37it/s] 35%|███▍ | 129431/371472 [10:17:23<18:46:33, 3.58it/s] 35%|███▍ | 129432/371472 [10:17:24<19:22:20, 3.47it/s] 35%|███▍ | 129433/371472 [10:17:24<20:03:27, 3.35it/s] 35%|███▍ | 129434/371472 [10:17:24<20:08:44, 3.34it/s] 35%|███▍ | 129435/371472 [10:17:25<18:55:33, 3.55it/s] 35%|███▍ | 129436/371472 [10:17:25<17:58:23, 3.74it/s] 35%|███▍ | 129437/371472 [10:17:25<17:36:53, 3.82it/s] 35%|███▍ | 129438/371472 [10:17:25<18:21:31, 3.66it/s] 35%|███▍ | 129439/371472 [10:17:26<18:04:47, 3.72it/s] 35%|███▍ | 129440/371472 [10:17:26<17:47:13, 3.78it/s] {'loss': 3.0862, 'learning_rate': 6.867094744553858e-07, 'epoch': 5.58} + 35%|███▍ | 129440/371472 [10:17:26<17:47:13, 3.78it/s] 35%|███▍ | 129441/371472 [10:17:26<17:22:12, 3.87it/s] 35%|███▍ | 129442/371472 [10:17:26<17:12:29, 3.91it/s] 35%|███▍ | 129443/371472 [10:17:27<18:26:47, 3.64it/s] 35%|███▍ | 129444/371472 [10:17:27<19:09:56, 3.51it/s] 35%|███▍ | 129445/371472 [10:17:27<18:25:12, 3.65it/s] 35%|███▍ | 129446/371472 [10:17:27<17:59:44, 3.74it/s] 35%|███▍ | 129447/371472 [10:17:28<18:10:15, 3.70it/s] 35%|███▍ | 129448/371472 [10:17:28<19:12:58, 3.50it/s] 35%|███▍ | 129449/371472 [10:17:28<19:30:05, 3.45it/s] 35%|███▍ | 129450/371472 [10:17:29<20:06:10, 3.34it/s] 35%|███▍ | 129451/371472 [10:17:29<19:25:32, 3.46it/s] 35%|███▍ | 129452/371472 [10:17:29<19:22:16, 3.47it/s] 35%|███▍ | 129453/371472 [10:17:30<18:54:07, 3.56it/s] 35%|███▍ | 129454/371472 [10:17:30<18:58:58, 3.54it/s] 35%|███▍ | 129455/371472 [10:17:30<18:56:33, 3.55it/s] 35%|███▍ | 129456/371472 [10:17:30<19:38:04, 3.42it/s] 35%|███▍ | 129457/371472 [10:17:31<19:24:21, 3.46it/s] 35%|███▍ | 129458/371472 [10:17:31<18:26:35, 3.65it/s] 35%|███▍ | 129459/371472 [10:17:31<19:02:24, 3.53it/s] 35%|███▍ | 129460/371472 [10:17:32<19:21:45, 3.47it/s] {'loss': 3.3525, 'learning_rate': 6.866609924799068e-07, 'epoch': 5.58} + 35%|███▍ | 129460/371472 [10:17:32<19:21:45, 3.47it/s] 35%|███▍ | 129461/371472 [10:17:32<19:35:09, 3.43it/s] 35%|███▍ | 129462/371472 [10:17:32<19:22:37, 3.47it/s] 35%|███▍ | 129463/371472 [10:17:32<18:44:18, 3.59it/s] 35%|███▍ | 129464/371472 [10:17:33<18:17:57, 3.67it/s] 35%|███▍ | 129465/371472 [10:17:33<18:12:57, 3.69it/s] 35%|███▍ | 129466/371472 [10:17:33<19:57:11, 3.37it/s] 35%|███▍ | 129467/371472 [10:17:34<20:10:01, 3.33it/s] 35%|███▍ | 129468/371472 [10:17:34<19:32:58, 3.44it/s] 35%|███▍ | 129469/371472 [10:17:34<19:00:48, 3.54it/s] 35%|███▍ | 129470/371472 [10:17:34<19:05:38, 3.52it/s] 35%|███▍ | 129471/371472 [10:17:35<20:26:22, 3.29it/s] 35%|███▍ | 129472/371472 [10:17:35<19:35:16, 3.43it/s] 35%|███▍ | 129473/371472 [10:17:35<18:57:47, 3.54it/s] 35%|███▍ | 129474/371472 [10:17:35<18:17:14, 3.68it/s] 35%|███▍ | 129475/371472 [10:17:36<19:15:32, 3.49it/s] 35%|███▍ | 129476/371472 [10:17:36<21:19:12, 3.15it/s] 35%|███▍ | 129477/371472 [10:17:36<20:24:42, 3.29it/s] 35%|███▍ | 129478/371472 [10:17:37<19:32:39, 3.44it/s] 35%|███▍ | 129479/371472 [10:17:37<18:58:42, 3.54it/s] 35%|███▍ | 129480/371472 [10:17:37<18:52:21, 3.56it/s] {'loss': 3.2225, 'learning_rate': 6.86612510504428e-07, 'epoch': 5.58} + 35%|███▍ | 129480/371472 [10:17:37<18:52:21, 3.56it/s] 35%|███▍ | 129481/371472 [10:17:38<19:16:10, 3.49it/s] 35%|███▍ | 129482/371472 [10:17:38<19:29:17, 3.45it/s] 35%|███▍ | 129483/371472 [10:17:38<19:30:20, 3.45it/s] 35%|███▍ | 129484/371472 [10:17:38<18:51:03, 3.57it/s] 35%|███▍ | 129485/371472 [10:17:39<19:27:28, 3.45it/s] 35%|███▍ | 129486/371472 [10:17:39<19:41:11, 3.41it/s] 35%|███▍ | 129487/371472 [10:17:39<20:31:15, 3.28it/s] 35%|███▍ | 129488/371472 [10:17:40<21:05:02, 3.19it/s] 35%|███▍ | 129489/371472 [10:17:40<20:06:47, 3.34it/s] 35%|███▍ | 129490/371472 [10:17:40<19:33:03, 3.44it/s] 35%|███▍ | 129491/371472 [10:17:41<21:47:07, 3.09it/s] 35%|███▍ | 129492/371472 [10:17:41<20:45:32, 3.24it/s] 35%|███▍ | 129493/371472 [10:17:41<19:50:12, 3.39it/s] 35%|███▍ | 129494/371472 [10:17:41<19:33:54, 3.44it/s] 35%|███▍ | 129495/371472 [10:17:42<19:05:58, 3.52it/s] 35%|███▍ | 129496/371472 [10:17:42<18:42:57, 3.59it/s] 35%|███▍ | 129497/371472 [10:17:42<18:09:50, 3.70it/s] 35%|███▍ | 129498/371472 [10:17:43<17:57:07, 3.74it/s] 35%|███▍ | 129499/371472 [10:17:43<19:01:08, 3.53it/s] 35%|███▍ | 129500/371472 [10:17:43<18:32:50, 3.62it/s] {'loss': 3.1107, 'learning_rate': 6.865640285289493e-07, 'epoch': 5.58} + 35%|███▍ | 129500/371472 [10:17:43<18:32:50, 3.62it/s] 35%|███▍ | 129501/371472 [10:17:43<18:50:53, 3.57it/s] 35%|███▍ | 129502/371472 [10:17:44<19:02:21, 3.53it/s] 35%|███▍ | 129503/371472 [10:17:44<18:49:42, 3.57it/s] 35%|███▍ | 129504/371472 [10:17:44<18:55:49, 3.55it/s] 35%|███▍ | 129505/371472 [10:17:44<18:46:25, 3.58it/s] 35%|███▍ | 129506/371472 [10:17:45<19:05:19, 3.52it/s] 35%|███▍ | 129507/371472 [10:17:45<18:45:22, 3.58it/s] 35%|███▍ | 129508/371472 [10:17:45<19:00:14, 3.54it/s] 35%|███▍ | 129509/371472 [10:17:46<18:44:44, 3.59it/s] 35%|███▍ | 129510/371472 [10:17:46<17:59:03, 3.74it/s] 35%|███▍ | 129511/371472 [10:17:46<18:21:32, 3.66it/s] 35%|███▍ | 129512/371472 [10:17:46<18:04:16, 3.72it/s] 35%|███▍ | 129513/371472 [10:17:47<18:31:22, 3.63it/s] 35%|███▍ | 129514/371472 [10:17:47<18:14:20, 3.68it/s] 35%|███▍ | 129515/371472 [10:17:47<18:51:44, 3.56it/s] 35%|███▍ | 129516/371472 [10:17:48<18:50:14, 3.57it/s] 35%|███▍ | 129517/371472 [10:17:48<18:44:32, 3.59it/s] 35%|███▍ | 129518/371472 [10:17:48<19:31:21, 3.44it/s] 35%|███▍ | 129519/371472 [10:17:48<20:40:10, 3.25it/s] 35%|███▍ | 129520/371472 [10:17:49<21:29:13, 3.13it/s] {'loss': 3.2329, 'learning_rate': 6.865155465534703e-07, 'epoch': 5.58} + 35%|███▍ | 129520/371472 [10:17:49<21:29:13, 3.13it/s] 35%|███▍ | 129521/371472 [10:17:49<21:24:26, 3.14it/s] 35%|███▍ | 129522/371472 [10:17:49<21:43:34, 3.09it/s] 35%|███▍ | 129523/371472 [10:17:50<20:54:51, 3.21it/s] 35%|███▍ | 129524/371472 [10:17:50<20:42:54, 3.24it/s] 35%|███▍ | 129525/371472 [10:17:50<22:39:09, 2.97it/s] 35%|███▍ | 129526/371472 [10:17:51<22:02:43, 3.05it/s] 35%|███▍ | 129527/371472 [10:17:51<20:46:11, 3.24it/s] 35%|███▍ | 129528/371472 [10:17:51<21:02:17, 3.19it/s] 35%|███▍ | 129529/371472 [10:17:52<20:23:26, 3.30it/s] 35%|███▍ | 129530/371472 [10:17:52<19:36:03, 3.43it/s] 35%|███▍ | 129531/371472 [10:17:52<18:58:05, 3.54it/s] 35%|███▍ | 129532/371472 [10:17:53<21:14:23, 3.16it/s] 35%|███▍ | 129533/371472 [10:17:53<19:43:58, 3.41it/s] 35%|███▍ | 129534/371472 [10:17:53<18:37:33, 3.61it/s] 35%|███▍ | 129535/371472 [10:17:53<18:59:49, 3.54it/s] 35%|███▍ | 129536/371472 [10:17:54<18:48:10, 3.57it/s] 35%|███▍ | 129537/371472 [10:17:54<18:04:06, 3.72it/s] 35%|███▍ | 129538/371472 [10:17:54<19:30:35, 3.44it/s] 35%|███▍ | 129539/371472 [10:17:54<19:01:47, 3.53it/s] 35%|███▍ | 129540/371472 [10:17:55<19:04:09, 3.52it/s] {'loss': 3.2811, 'learning_rate': 6.864670645779912e-07, 'epoch': 5.58} + 35%|███▍ | 129540/371472 [10:17:55<19:04:09, 3.52it/s] 35%|███▍ | 129541/371472 [10:17:55<19:21:28, 3.47it/s] 35%|███▍ | 129542/371472 [10:17:55<19:41:17, 3.41it/s] 35%|███▍ | 129543/371472 [10:17:56<19:05:37, 3.52it/s] 35%|███▍ | 129544/371472 [10:17:56<19:41:38, 3.41it/s] 35%|███▍ | 129545/371472 [10:17:56<18:54:33, 3.55it/s] 35%|███▍ | 129546/371472 [10:17:56<19:19:49, 3.48it/s] 35%|███▍ | 129547/371472 [10:17:57<19:54:53, 3.37it/s] 35%|███▍ | 129548/371472 [10:17:57<20:05:12, 3.35it/s] 35%|███▍ | 129549/371472 [10:17:57<19:48:09, 3.39it/s] 35%|███▍ | 129550/371472 [10:17:58<18:42:36, 3.59it/s] 35%|███▍ | 129551/371472 [10:17:58<18:18:39, 3.67it/s] 35%|███▍ | 129552/371472 [10:17:58<18:32:23, 3.62it/s] 35%|███▍ | 129553/371472 [10:17:58<19:23:27, 3.47it/s] 35%|███▍ | 129554/371472 [10:17:59<19:01:11, 3.53it/s] 35%|███▍ | 129555/371472 [10:17:59<18:48:36, 3.57it/s] 35%|███▍ | 129556/371472 [10:17:59<19:41:42, 3.41it/s] 35%|███▍ | 129557/371472 [10:18:00<19:24:17, 3.46it/s] 35%|███▍ | 129558/371472 [10:18:00<18:31:24, 3.63it/s] 35%|███▍ | 129559/371472 [10:18:00<18:18:47, 3.67it/s] 35%|███▍ | 129560/371472 [10:18:00<18:12:27, 3.69it/s] {'loss': 3.1129, 'learning_rate': 6.864185826025124e-07, 'epoch': 5.58} + 35%|███▍ | 129560/371472 [10:18:00<18:12:27, 3.69it/s] 35%|███▍ | 129561/371472 [10:18:01<17:56:36, 3.74it/s] 35%|███▍ | 129562/371472 [10:18:01<17:25:13, 3.86it/s] 35%|███▍ | 129563/371472 [10:18:01<19:36:37, 3.43it/s] 35%|███▍ | 129564/371472 [10:18:02<19:11:32, 3.50it/s] 35%|███▍ | 129565/371472 [10:18:02<19:28:44, 3.45it/s] 35%|███▍ | 129566/371472 [10:18:02<19:42:13, 3.41it/s] 35%|███▍ | 129567/371472 [10:18:02<18:53:09, 3.56it/s] 35%|███▍ | 129568/371472 [10:18:03<19:49:41, 3.39it/s] 35%|███▍ | 129569/371472 [10:18:03<20:16:57, 3.31it/s] 35%|███▍ | 129570/371472 [10:18:03<18:47:20, 3.58it/s] 35%|███▍ | 129571/371472 [10:18:04<18:54:53, 3.55it/s] 35%|███▍ | 129572/371472 [10:18:04<18:36:56, 3.61it/s] 35%|███▍ | 129573/371472 [10:18:04<19:21:19, 3.47it/s] 35%|███▍ | 129574/371472 [10:18:04<19:29:50, 3.45it/s] 35%|███▍ | 129575/371472 [10:18:05<19:04:33, 3.52it/s] 35%|███▍ | 129576/371472 [10:18:05<19:17:15, 3.48it/s] 35%|███▍ | 129577/371472 [10:18:05<18:13:13, 3.69it/s] 35%|███▍ | 129578/371472 [10:18:05<18:06:17, 3.71it/s] 35%|███▍ | 129579/371472 [10:18:06<17:57:20, 3.74it/s] 35%|███▍ | 129580/371472 [10:18:06<17:58:52, 3.74it/s] {'loss': 3.3175, 'learning_rate': 6.863701006270336e-07, 'epoch': 5.58} + 35%|███▍ | 129580/371472 [10:18:06<17:58:52, 3.74it/s] 35%|███▍ | 129581/371472 [10:18:06<18:05:53, 3.71it/s] 35%|███▍ | 129582/371472 [10:18:07<18:40:08, 3.60it/s] 35%|███▍ | 129583/371472 [10:18:07<18:08:59, 3.70it/s] 35%|███▍ | 129584/371472 [10:18:07<18:33:03, 3.62it/s] 35%|███▍ | 129585/371472 [10:18:07<17:48:46, 3.77it/s] 35%|███▍ | 129586/371472 [10:18:08<18:01:49, 3.73it/s] 35%|███▍ | 129587/371472 [10:18:08<18:52:42, 3.56it/s] 35%|███▍ | 129588/371472 [10:18:08<19:34:14, 3.43it/s] 35%|███▍ | 129589/371472 [10:18:09<18:38:14, 3.61it/s] 35%|███▍ | 129590/371472 [10:18:09<19:11:04, 3.50it/s] 35%|███▍ | 129591/371472 [10:18:09<19:05:09, 3.52it/s] 35%|███▍ | 129592/371472 [10:18:09<18:39:43, 3.60it/s] 35%|███▍ | 129593/371472 [10:18:10<18:00:56, 3.73it/s] 35%|███▍ | 129594/371472 [10:18:10<17:49:22, 3.77it/s] 35%|███▍ | 129595/371472 [10:18:10<18:13:32, 3.69it/s] 35%|███▍ | 129596/371472 [10:18:10<18:16:44, 3.68it/s] 35%|███▍ | 129597/371472 [10:18:11<17:37:18, 3.81it/s] 35%|███▍ | 129598/371472 [10:18:11<17:22:06, 3.87it/s] 35%|███▍ | 129599/371472 [10:18:11<17:52:13, 3.76it/s] 35%|███▍ | 129600/371472 [10:18:12<20:05:51, 3.34it/s] {'loss': 3.3308, 'learning_rate': 6.863216186515546e-07, 'epoch': 5.58} + 35%|███▍ | 129600/371472 [10:18:12<20:05:51, 3.34it/s] 35%|███▍ | 129601/371472 [10:18:12<20:26:25, 3.29it/s] 35%|███▍ | 129602/371472 [10:18:12<19:24:23, 3.46it/s] 35%|███▍ | 129603/371472 [10:18:12<18:53:16, 3.56it/s] 35%|███▍ | 129604/371472 [10:18:13<19:45:34, 3.40it/s] 35%|███▍ | 129605/371472 [10:18:13<19:43:39, 3.41it/s] 35%|███▍ | 129606/371472 [10:18:13<19:05:39, 3.52it/s] 35%|███▍ | 129607/371472 [10:18:14<20:14:15, 3.32it/s] 35%|███▍ | 129608/371472 [10:18:14<21:15:27, 3.16it/s] 35%|███▍ | 129609/371472 [10:18:14<19:44:51, 3.40it/s] 35%|███▍ | 129610/371472 [10:18:14<18:55:08, 3.55it/s] 35%|███▍ | 129611/371472 [10:18:15<19:41:22, 3.41it/s] 35%|███▍ | 129612/371472 [10:18:15<20:46:36, 3.23it/s] 35%|███▍ | 129613/371472 [10:18:15<19:41:25, 3.41it/s] 35%|███▍ | 129614/371472 [10:18:16<19:49:08, 3.39it/s] 35%|███▍ | 129615/371472 [10:18:16<20:37:01, 3.26it/s] 35%|███▍ | 129616/371472 [10:18:16<20:11:14, 3.33it/s] 35%|███▍ | 129617/371472 [10:18:17<20:04:12, 3.35it/s] 35%|███▍ | 129618/371472 [10:18:17<19:46:06, 3.40it/s] 35%|███▍ | 129619/371472 [10:18:17<18:41:52, 3.59it/s] 35%|███▍ | 129620/371472 [10:18:17<18:24:26, 3.65it/s] {'loss': 3.269, 'learning_rate': 6.862731366760757e-07, 'epoch': 5.58} + 35%|███▍ | 129620/371472 [10:18:17<18:24:26, 3.65it/s] 35%|███▍ | 129621/371472 [10:18:18<19:20:13, 3.47it/s] 35%|███▍ | 129622/371472 [10:18:18<19:45:33, 3.40it/s] 35%|███▍ | 129623/371472 [10:18:18<19:42:36, 3.41it/s] 35%|███▍ | 129624/371472 [10:18:19<19:23:16, 3.47it/s] 35%|███▍ | 129625/371472 [10:18:19<19:05:18, 3.52it/s] 35%|███▍ | 129626/371472 [10:18:19<18:31:06, 3.63it/s] 35%|███▍ | 129627/371472 [10:18:19<18:37:43, 3.61it/s] 35%|███▍ | 129628/371472 [10:18:20<19:06:44, 3.51it/s] 35%|███▍ | 129629/371472 [10:18:20<18:55:15, 3.55it/s] 35%|███▍ | 129630/371472 [10:18:20<18:57:58, 3.54it/s] 35%|███▍ | 129631/371472 [10:18:21<19:37:26, 3.42it/s] 35%|███▍ | 129632/371472 [10:18:21<19:33:16, 3.44it/s] 35%|███▍ | 129633/371472 [10:18:21<18:55:07, 3.55it/s] 35%|███▍ | 129634/371472 [10:18:21<18:11:01, 3.69it/s] 35%|███▍ | 129635/371472 [10:18:22<17:34:39, 3.82it/s] 35%|███▍ | 129636/371472 [10:18:22<17:08:52, 3.92it/s] 35%|███▍ | 129637/371472 [10:18:22<18:42:04, 3.59it/s] 35%|███▍ | 129638/371472 [10:18:23<19:23:20, 3.46it/s] 35%|███▍ | 129639/371472 [10:18:23<21:08:41, 3.18it/s] 35%|███▍ | 129640/371472 [10:18:23<20:24:17, 3.29it/s] {'loss': 3.404, 'learning_rate': 6.862246547005969e-07, 'epoch': 5.58} + 35%|███▍ | 129640/371472 [10:18:23<20:24:17, 3.29it/s] 35%|███▍ | 129641/371472 [10:18:23<20:21:03, 3.30it/s] 35%|███▍ | 129642/371472 [10:18:24<19:29:31, 3.45it/s] 35%|███▍ | 129643/371472 [10:18:24<19:46:04, 3.40it/s] 35%|███▍ | 129644/371472 [10:18:24<18:57:37, 3.54it/s] 35%|███▍ | 129645/371472 [10:18:25<18:52:05, 3.56it/s] 35%|███▍ | 129646/371472 [10:18:25<19:29:01, 3.45it/s] 35%|███▍ | 129647/371472 [10:18:25<18:47:22, 3.58it/s] 35%|███▍ | 129648/371472 [10:18:25<18:17:50, 3.67it/s] 35%|███▍ | 129649/371472 [10:18:26<19:39:48, 3.42it/s] 35%|███▍ | 129650/371472 [10:18:26<19:14:06, 3.49it/s] 35%|███▍ | 129651/371472 [10:18:26<19:37:08, 3.42it/s] 35%|███▍ | 129652/371472 [10:18:27<19:16:47, 3.48it/s] 35%|███▍ | 129653/371472 [10:18:27<19:01:12, 3.53it/s] 35%|███▍ | 129654/371472 [10:18:27<18:42:15, 3.59it/s] 35%|███▍ | 129655/371472 [10:18:27<18:56:36, 3.55it/s] 35%|███▍ | 129656/371472 [10:18:28<18:33:44, 3.62it/s] 35%|███▍ | 129657/371472 [10:18:28<18:05:29, 3.71it/s] 35%|███▍ | 129658/371472 [10:18:28<18:06:06, 3.71it/s] 35%|███▍ | 129659/371472 [10:18:28<17:42:05, 3.79it/s] 35%|███▍ | 129660/371472 [10:18:29<18:11:31, 3.69it/s] {'loss': 3.3046, 'learning_rate': 6.86176172725118e-07, 'epoch': 5.58} + 35%|███▍ | 129660/371472 [10:18:29<18:11:31, 3.69it/s] 35%|███▍ | 129661/371472 [10:18:29<17:51:02, 3.76it/s] 35%|███▍ | 129662/371472 [10:18:29<17:41:10, 3.80it/s] 35%|███▍ | 129663/371472 [10:18:30<18:27:25, 3.64it/s] 35%|███▍ | 129664/371472 [10:18:30<18:11:56, 3.69it/s] 35%|███▍ | 129665/371472 [10:18:30<18:01:53, 3.73it/s] 35%|███▍ | 129666/371472 [10:18:30<18:32:48, 3.62it/s] 35%|███▍ | 129667/371472 [10:18:31<18:01:32, 3.73it/s] 35%|███▍ | 129668/371472 [10:18:31<17:17:11, 3.89it/s] 35%|███▍ | 129669/371472 [10:18:31<17:43:12, 3.79it/s] 35%|███▍ | 129670/371472 [10:18:31<17:48:13, 3.77it/s] 35%|███▍ | 129671/371472 [10:18:32<17:49:44, 3.77it/s] 35%|███▍ | 129672/371472 [10:18:32<19:08:14, 3.51it/s] 35%|███▍ | 129673/371472 [10:18:32<18:29:34, 3.63it/s] 35%|███▍ | 129674/371472 [10:18:32<17:44:04, 3.79it/s] 35%|███▍ | 129675/371472 [10:18:33<18:52:17, 3.56it/s] 35%|███▍ | 129676/371472 [10:18:33<19:07:19, 3.51it/s] 35%|███▍ | 129677/371472 [10:18:33<19:11:05, 3.50it/s] 35%|███▍ | 129678/371472 [10:18:34<18:43:30, 3.59it/s] 35%|███▍ | 129679/371472 [10:18:34<18:04:41, 3.72it/s] 35%|███▍ | 129680/371472 [10:18:34<17:56:19, 3.74it/s] {'loss': 3.153, 'learning_rate': 6.86127690749639e-07, 'epoch': 5.59} + 35%|███▍ | 129680/371472 [10:18:34<17:56:19, 3.74it/s] 35%|███▍ | 129681/371472 [10:18:34<18:04:53, 3.71it/s] 35%|███▍ | 129682/371472 [10:18:35<17:35:33, 3.82it/s] 35%|███▍ | 129683/371472 [10:18:35<16:57:59, 3.96it/s] 35%|███▍ | 129684/371472 [10:18:35<16:25:54, 4.09it/s] 35%|███▍ | 129685/371472 [10:18:35<17:22:36, 3.87it/s] 35%|███▍ | 129686/371472 [10:18:36<18:34:02, 3.62it/s] 35%|███▍ | 129687/371472 [10:18:36<18:54:14, 3.55it/s] 35%|███▍ | 129688/371472 [10:18:36<19:02:44, 3.53it/s] 35%|███▍ | 129689/371472 [10:18:37<20:15:44, 3.31it/s] 35%|███▍ | 129690/371472 [10:18:37<19:27:50, 3.45it/s] 35%|███▍ | 129691/371472 [10:18:37<19:44:19, 3.40it/s] 35%|███▍ | 129692/371472 [10:18:38<19:40:53, 3.41it/s] 35%|███▍ | 129693/371472 [10:18:38<19:13:56, 3.49it/s] 35%|███▍ | 129694/371472 [10:18:38<18:31:57, 3.62it/s] 35%|███▍ | 129695/371472 [10:18:38<18:49:36, 3.57it/s] 35%|███▍ | 129696/371472 [10:18:39<18:48:00, 3.57it/s] 35%|███▍ | 129697/371472 [10:18:39<18:24:48, 3.65it/s] 35%|███▍ | 129698/371472 [10:18:39<17:58:59, 3.73it/s] 35%|███▍ | 129699/371472 [10:18:39<17:22:13, 3.87it/s] 35%|███▍ | 129700/371472 [10:18:40<17:06:37, 3.93it/s] {'loss': 3.0212, 'learning_rate': 6.860792087741601e-07, 'epoch': 5.59} + 35%|███▍ | 129700/371472 [10:18:40<17:06:37, 3.93it/s] 35%|███▍ | 129701/371472 [10:18:40<17:02:50, 3.94it/s] 35%|███▍ | 129702/371472 [10:18:40<16:34:47, 4.05it/s] 35%|███▍ | 129703/371472 [10:18:40<17:13:33, 3.90it/s] 35%|███▍ | 129704/371472 [10:18:41<18:08:19, 3.70it/s] 35%|███▍ | 129705/371472 [10:18:41<19:15:38, 3.49it/s] 35%|███▍ | 129706/371472 [10:18:41<19:51:29, 3.38it/s] 35%|███▍ | 129707/371472 [10:18:42<19:49:16, 3.39it/s] 35%|███▍ | 129708/371472 [10:18:42<20:54:07, 3.21it/s] 35%|███▍ | 129709/371472 [10:18:42<19:23:54, 3.46it/s] 35%|███▍ | 129710/371472 [10:18:42<18:57:39, 3.54it/s] 35%|███▍ | 129711/371472 [10:18:43<18:21:16, 3.66it/s] 35%|███▍ | 129712/371472 [10:18:43<19:49:40, 3.39it/s] 35%|███▍ | 129713/371472 [10:18:43<20:18:35, 3.31it/s] 35%|███▍ | 129714/371472 [10:18:44<20:17:45, 3.31it/s] 35%|███▍ | 129715/371472 [10:18:44<20:02:20, 3.35it/s] 35%|███▍ | 129716/371472 [10:18:44<18:59:24, 3.54it/s] 35%|███▍ | 129717/371472 [10:18:44<18:29:33, 3.63it/s] 35%|███▍ | 129718/371472 [10:18:45<18:12:10, 3.69it/s] 35%|███▍ | 129719/371472 [10:18:45<18:41:28, 3.59it/s] 35%|███▍ | 129720/371472 [10:18:45<19:53:54, 3.37it/s] {'loss': 3.0432, 'learning_rate': 6.860307267986813e-07, 'epoch': 5.59} + 35%|███▍ | 129720/371472 [10:18:45<19:53:54, 3.37it/s] 35%|███▍ | 129721/371472 [10:18:46<19:14:04, 3.49it/s] 35%|███▍ | 129722/371472 [10:18:46<18:44:32, 3.58it/s] 35%|███▍ | 129723/371472 [10:18:46<17:59:25, 3.73it/s] 35%|███▍ | 129724/371472 [10:18:46<18:39:31, 3.60it/s] 35%|███▍ | 129725/371472 [10:18:47<17:57:02, 3.74it/s] 35%|███▍ | 129726/371472 [10:18:47<18:05:56, 3.71it/s] 35%|███▍ | 129727/371472 [10:18:47<17:29:59, 3.84it/s] 35%|███▍ | 129728/371472 [10:18:47<17:59:38, 3.73it/s] 35%|███▍ | 129729/371472 [10:18:48<19:04:24, 3.52it/s] 35%|███▍ | 129730/371472 [10:18:48<18:17:43, 3.67it/s] 35%|███▍ | 129731/371472 [10:18:48<17:57:10, 3.74it/s] 35%|███▍ | 129732/371472 [10:18:49<18:13:04, 3.69it/s] 35%|███▍ | 129733/371472 [10:18:49<17:46:06, 3.78it/s] 35%|███▍ | 129734/371472 [10:18:49<18:11:04, 3.69it/s] 35%|███▍ | 129735/371472 [10:18:49<18:13:06, 3.69it/s] 35%|███▍ | 129736/371472 [10:18:50<18:28:21, 3.64it/s] 35%|███▍ | 129737/371472 [10:18:50<18:54:47, 3.55it/s] 35%|███▍ | 129738/371472 [10:18:50<19:50:15, 3.38it/s] 35%|███▍ | 129739/371472 [10:18:51<21:08:57, 3.17it/s] 35%|███▍ | 129740/371472 [10:18:51<21:00:48, 3.20it/s] {'loss': 3.3084, 'learning_rate': 6.859822448232025e-07, 'epoch': 5.59} + 35%|███▍ | 129740/371472 [10:18:51<21:00:48, 3.20it/s] 35%|███▍ | 129741/371472 [10:18:51<19:46:43, 3.39it/s] 35%|███▍ | 129742/371472 [10:18:52<28:18:27, 2.37it/s] 35%|███▍ | 129743/371472 [10:18:52<24:46:57, 2.71it/s] 35%|███▍ | 129744/371472 [10:18:52<23:28:03, 2.86it/s] 35%|███▍ | 129745/371472 [10:18:53<21:43:42, 3.09it/s] 35%|███▍ | 129746/371472 [10:18:53<20:55:10, 3.21it/s] 35%|███▍ | 129747/371472 [10:18:53<20:15:06, 3.32it/s] 35%|███▍ | 129748/371472 [10:18:54<19:24:47, 3.46it/s] 35%|███▍ | 129749/371472 [10:18:54<19:38:10, 3.42it/s] 35%|███▍ | 129750/371472 [10:18:54<19:01:33, 3.53it/s] 35%|███▍ | 129751/371472 [10:18:54<18:14:58, 3.68it/s] 35%|███▍ | 129752/371472 [10:18:55<18:17:15, 3.67it/s] 35%|███▍ | 129753/371472 [10:18:55<17:53:20, 3.75it/s] 35%|███▍ | 129754/371472 [10:18:55<22:01:38, 3.05it/s] 35%|███▍ | 129755/371472 [10:18:56<22:21:50, 3.00it/s] 35%|███▍ | 129756/371472 [10:18:56<21:56:41, 3.06it/s] 35%|███▍ | 129757/371472 [10:18:56<21:23:01, 3.14it/s] 35%|███▍ | 129758/371472 [10:18:57<20:00:12, 3.36it/s] 35%|███▍ | 129759/371472 [10:18:57<18:51:05, 3.56it/s] 35%|███▍ | 129760/371472 [10:18:57<18:32:06, 3.62it/s] {'loss': 3.1186, 'learning_rate': 6.859337628477234e-07, 'epoch': 5.59} + 35%|███▍ | 129760/371472 [10:18:57<18:32:06, 3.62it/s] 35%|███▍ | 129761/371472 [10:18:57<19:17:38, 3.48it/s] 35%|███▍ | 129762/371472 [10:18:58<19:36:46, 3.42it/s] 35%|███▍ | 129763/371472 [10:18:58<18:41:06, 3.59it/s] 35%|███▍ | 129764/371472 [10:18:58<18:25:19, 3.64it/s] 35%|███▍ | 129765/371472 [10:18:59<19:48:35, 3.39it/s] 35%|███▍ | 129766/371472 [10:18:59<20:56:58, 3.20it/s] 35%|███▍ | 129767/371472 [10:18:59<20:00:40, 3.36it/s] 35%|███▍ | 129768/371472 [10:18:59<18:55:03, 3.55it/s] 35%|███▍ | 129769/371472 [10:19:00<18:33:25, 3.62it/s] 35%|███▍ | 129770/371472 [10:19:00<20:04:50, 3.34it/s] 35%|███▍ | 129771/371472 [10:19:00<19:09:04, 3.51it/s] 35%|███▍ | 129772/371472 [10:19:01<18:30:58, 3.63it/s] 35%|███▍ | 129773/371472 [10:19:01<17:37:25, 3.81it/s] 35%|███▍ | 129774/371472 [10:19:01<17:22:58, 3.86it/s] 35%|███▍ | 129775/371472 [10:19:01<17:09:42, 3.91it/s] 35%|███▍ | 129776/371472 [10:19:02<17:20:28, 3.87it/s] 35%|███▍ | 129777/371472 [10:19:02<17:49:16, 3.77it/s] 35%|███▍ | 129778/371472 [10:19:02<18:33:04, 3.62it/s] 35%|███▍ | 129779/371472 [10:19:02<18:06:03, 3.71it/s] 35%|███▍ | 129780/371472 [10:19:03<17:48:48, 3.77it/s] {'loss': 3.1652, 'learning_rate': 6.858852808722446e-07, 'epoch': 5.59} + 35%|███▍ | 129780/371472 [10:19:03<17:48:48, 3.77it/s] 35%|███▍ | 129781/371472 [10:19:03<18:39:48, 3.60it/s] 35%|███▍ | 129782/371472 [10:19:03<19:15:04, 3.49it/s] 35%|███▍ | 129783/371472 [10:19:04<18:57:49, 3.54it/s] 35%|███▍ | 129784/371472 [10:19:04<18:44:39, 3.58it/s] 35%|███▍ | 129785/371472 [10:19:04<19:05:21, 3.52it/s] 35%|███▍ | 129786/371472 [10:19:04<19:18:46, 3.48it/s] 35%|███▍ | 129787/371472 [10:19:05<18:49:49, 3.57it/s] 35%|███▍ | 129788/371472 [10:19:05<18:43:10, 3.59it/s] 35%|███▍ | 129789/371472 [10:19:05<18:07:29, 3.70it/s] 35%|███▍ | 129790/371472 [10:19:05<18:12:59, 3.69it/s] 35%|███▍ | 129791/371472 [10:19:06<17:54:10, 3.75it/s] 35%|███▍ | 129792/371472 [10:19:06<18:16:47, 3.67it/s] 35%|███▍ | 129793/371472 [10:19:06<18:59:54, 3.53it/s] 35%|███▍ | 129794/371472 [10:19:07<19:23:42, 3.46it/s] 35%|███▍ | 129795/371472 [10:19:07<19:09:57, 3.50it/s] 35%|█���█▍ | 129796/371472 [10:19:07<19:53:45, 3.37it/s] 35%|███▍ | 129797/371472 [10:19:07<19:25:32, 3.46it/s] 35%|███▍ | 129798/371472 [10:19:08<19:11:42, 3.50it/s] 35%|███▍ | 129799/371472 [10:19:08<20:26:39, 3.28it/s] 35%|███▍ | 129800/371472 [10:19:09<23:38:32, 2.84it/s] {'loss': 3.161, 'learning_rate': 6.858367988967657e-07, 'epoch': 5.59} + 35%|███▍ | 129800/371472 [10:19:09<23:38:32, 2.84it/s] 35%|███▍ | 129801/371472 [10:19:09<21:42:48, 3.09it/s] 35%|███▍ | 129802/371472 [10:19:09<21:13:01, 3.16it/s] 35%|███▍ | 129803/371472 [10:19:09<20:30:27, 3.27it/s] 35%|███▍ | 129804/371472 [10:19:10<19:39:57, 3.41it/s] 35%|███▍ | 129805/371472 [10:19:10<19:17:55, 3.48it/s] 35%|███▍ | 129806/371472 [10:19:10<20:09:43, 3.33it/s] 35%|███▍ | 129807/371472 [10:19:11<19:33:04, 3.43it/s] 35%|███▍ | 129808/371472 [10:19:11<19:23:42, 3.46it/s] 35%|███▍ | 129809/371472 [10:19:11<19:24:47, 3.46it/s] 35%|███▍ | 129810/371472 [10:19:11<19:12:12, 3.50it/s] 35%|███▍ | 129811/371472 [10:19:12<20:04:30, 3.34it/s] 35%|███▍ | 129812/371472 [10:19:12<19:10:39, 3.50it/s] 35%|███▍ | 129813/371472 [10:19:12<18:28:44, 3.63it/s] 35%|███▍ | 129814/371472 [10:19:12<18:05:04, 3.71it/s] 35%|███▍ | 129815/371472 [10:19:13<17:59:34, 3.73it/s] 35%|███▍ | 129816/371472 [10:19:13<18:56:35, 3.54it/s] 35%|███▍ | 129817/371472 [10:19:13<19:12:09, 3.50it/s] 35%|███▍ | 129818/371472 [10:19:14<19:08:42, 3.51it/s] 35%|███▍ | 129819/371472 [10:19:14<18:47:01, 3.57it/s] 35%|███▍ | 129820/371472 [10:19:14<18:03:47, 3.72it/s] {'loss': 3.2219, 'learning_rate': 6.857883169212869e-07, 'epoch': 5.59} + 35%|███▍ | 129820/371472 [10:19:14<18:03:47, 3.72it/s] 35%|███▍ | 129821/371472 [10:19:14<17:58:33, 3.73it/s] 35%|███▍ | 129822/371472 [10:19:15<17:36:12, 3.81it/s] 35%|███▍ | 129823/371472 [10:19:15<19:05:41, 3.52it/s] 35%|███▍ | 129824/371472 [10:19:15<19:04:48, 3.52it/s] 35%|███▍ | 129825/371472 [10:19:16<18:50:07, 3.56it/s] 35%|███▍ | 129826/371472 [10:19:16<18:50:11, 3.56it/s] 35%|███▍ | 129827/371472 [10:19:16<19:26:44, 3.45it/s] 35%|███▍ | 129828/371472 [10:19:16<18:38:58, 3.60it/s] 35%|███▍ | 129829/371472 [10:19:17<18:24:13, 3.65it/s] 35%|███▍ | 129830/371472 [10:19:17<18:56:23, 3.54it/s] 35%|███▍ | 129831/371472 [10:19:17<18:55:34, 3.55it/s] 35%|███▍ | 129832/371472 [10:19:17<18:03:30, 3.72it/s] 35%|███▍ | 129833/371472 [10:19:18<18:18:39, 3.67it/s] 35%|███▍ | 129834/371472 [10:19:18<18:09:12, 3.70it/s] 35%|███▍ | 129835/371472 [10:19:18<18:17:57, 3.67it/s] 35%|███▍ | 129836/371472 [10:19:19<20:01:06, 3.35it/s] 35%|███▍ | 129837/371472 [10:19:19<20:26:58, 3.28it/s] 35%|███▍ | 129838/371472 [10:19:19<19:48:56, 3.39it/s] 35%|███▍ | 129839/371472 [10:19:20<19:32:58, 3.43it/s] 35%|███▍ | 129840/371472 [10:19:20<19:04:18, 3.52it/s] {'loss': 3.2173, 'learning_rate': 6.857398349458078e-07, 'epoch': 5.59} + 35%|███▍ | 129840/371472 [10:19:20<19:04:18, 3.52it/s] 35%|███▍ | 129841/371472 [10:19:20<18:27:45, 3.64it/s] 35%|███▍ | 129842/371472 [10:19:20<18:13:37, 3.68it/s] 35%|███▍ | 129843/371472 [10:19:21<18:45:45, 3.58it/s] 35%|███▍ | 129844/371472 [10:19:21<18:43:28, 3.58it/s] 35%|███▍ | 129845/371472 [10:19:21<18:33:17, 3.62it/s] 35%|███▍ | 129846/371472 [10:19:21<19:31:50, 3.44it/s] 35%|███▍ | 129847/371472 [10:19:22<19:55:45, 3.37it/s] 35%|███▍ | 129848/371472 [10:19:22<18:37:07, 3.60it/s] 35%|███▍ | 129849/371472 [10:19:22<18:09:21, 3.70it/s] 35%|███▍ | 129850/371472 [10:19:23<18:27:14, 3.64it/s] 35%|███▍ | 129851/371472 [10:19:23<18:32:47, 3.62it/s] 35%|███▍ | 129852/371472 [10:19:23<18:09:27, 3.70it/s] 35%|███▍ | 129853/371472 [10:19:23<18:05:28, 3.71it/s] 35%|███▍ | 129854/371472 [10:19:24<17:57:59, 3.74it/s] 35%|███▍ | 129855/371472 [10:19:24<18:58:16, 3.54it/s] 35%|███▍ | 129856/371472 [10:19:24<18:58:45, 3.54it/s] 35%|███▍ | 129857/371472 [10:19:25<19:11:15, 3.50it/s] 35%|███▍ | 129858/371472 [10:19:25<19:12:21, 3.49it/s] 35%|███▍ | 129859/371472 [10:19:25<19:01:14, 3.53it/s] 35%|███▍ | 129860/371472 [10:19:25<19:03:35, 3.52it/s] {'loss': 3.1586, 'learning_rate': 6.85691352970329e-07, 'epoch': 5.59} + 35%|███▍ | 129860/371472 [10:19:25<19:03:35, 3.52it/s] 35%|███▍ | 129861/371472 [10:19:26<20:40:03, 3.25it/s] 35%|███▍ | 129862/371472 [10:19:26<19:53:45, 3.37it/s] 35%|███▍ | 129863/371472 [10:19:26<19:31:19, 3.44it/s] 35%|███▍ | 129864/371472 [10:19:27<19:00:23, 3.53it/s] 35%|███▍ | 129865/371472 [10:19:27<19:09:33, 3.50it/s] 35%|███▍ | 129866/371472 [10:19:27<19:01:14, 3.53it/s] 35%|███▍ | 129867/371472 [10:19:27<18:48:58, 3.57it/s] 35%|███▍ | 129868/371472 [10:19:28<19:27:04, 3.45it/s] 35%|███▍ | 129869/371472 [10:19:28<20:03:24, 3.35it/s] 35%|███▍ | 129870/371472 [10:19:28<19:56:58, 3.36it/s] 35%|███▍ | 129871/371472 [10:19:29<19:27:57, 3.45it/s] 35%|███▍ | 129872/371472 [10:19:29<20:39:04, 3.25it/s] 35%|███▍ | 129873/371472 [10:19:29<20:12:20, 3.32it/s] 35%|███▍ | 129874/371472 [10:19:30<19:52:01, 3.38it/s] 35%|███▍ | 129875/371472 [10:19:30<23:51:40, 2.81it/s] 35%|███▍ | 129876/371472 [10:19:30<22:18:00, 3.01it/s] 35%|███▍ | 129877/371472 [10:19:31<20:50:41, 3.22it/s] 35%|███▍ | 129878/371472 [10:19:31<19:57:19, 3.36it/s] 35%|███▍ | 129879/371472 [10:19:31<20:42:25, 3.24it/s] 35%|███▍ | 129880/371472 [10:19:31<19:51:52, 3.38it/s] {'loss': 3.0959, 'learning_rate': 6.856428709948502e-07, 'epoch': 5.59} + 35%|███▍ | 129880/371472 [10:19:31<19:51:52, 3.38it/s] 35%|███▍ | 129881/371472 [10:19:32<18:49:31, 3.56it/s] 35%|███▍ | 129882/371472 [10:19:32<19:07:55, 3.51it/s] 35%|███▍ | 129883/371472 [10:19:32<20:48:52, 3.22it/s] 35%|███▍ | 129884/371472 [10:19:33<20:51:08, 3.22it/s] 35%|███▍ | 129885/371472 [10:19:33<19:18:02, 3.48it/s] 35%|███▍ | 129886/371472 [10:19:33<19:04:19, 3.52it/s] 35%|███▍ | 129887/371472 [10:19:33<19:49:51, 3.38it/s] 35%|███▍ | 129888/371472 [10:19:34<19:26:43, 3.45it/s] 35%|███▍ | 129889/371472 [10:19:34<18:40:12, 3.59it/s] 35%|███▍ | 129890/371472 [10:19:34<19:15:32, 3.48it/s] 35%|███▍ | 129891/371472 [10:19:35<18:23:42, 3.65it/s] 35%|███▍ | 129892/371472 [10:19:35<18:13:26, 3.68it/s] 35%|███▍ | 129893/371472 [10:19:35<19:59:54, 3.36it/s] 35%|███▍ | 129894/371472 [10:19:36<20:23:31, 3.29it/s] 35%|███▍ | 129895/371472 [10:19:36<21:27:46, 3.13it/s] 35%|███▍ | 129896/371472 [10:19:36<20:44:59, 3.23it/s] 35%|███▍ | 129897/371472 [10:19:36<20:53:24, 3.21it/s] 35%|███▍ | 129898/371472 [10:19:37<19:51:17, 3.38it/s] 35%|███▍ | 129899/371472 [10:19:37<21:20:33, 3.14it/s] 35%|███▍ | 129900/371472 [10:19:37<20:35:45, 3.26it/s] {'loss': 3.1031, 'learning_rate': 6.855943890193712e-07, 'epoch': 5.6} + 35%|███▍ | 129900/371472 [10:19:37<20:35:45, 3.26it/s] 35%|███▍ | 129901/371472 [10:19:38<21:36:29, 3.11it/s] 35%|███▍ | 129902/371472 [10:19:38<20:01:58, 3.35it/s] 35%|███▍ | 129903/371472 [10:19:38<19:17:37, 3.48it/s] 35%|███▍ | 129904/371472 [10:19:38<18:30:57, 3.62it/s] 35%|███▍ | 129905/371472 [10:19:39<18:43:02, 3.58it/s] 35%|███▍ | 129906/371472 [10:19:39<18:12:12, 3.69it/s] 35%|███▍ | 129907/371472 [10:19:39<18:33:31, 3.62it/s] 35%|███▍ | 129908/371472 [10:19:40<18:38:49, 3.60it/s] 35%|███▍ | 129909/371472 [10:19:40<19:44:17, 3.40it/s] 35%|███▍ | 129910/371472 [10:19:40<19:18:15, 3.48it/s] 35%|███▍ | 129911/371472 [10:19:40<19:03:03, 3.52it/s] 35%|███▍ | 129912/371472 [10:19:41<18:41:56, 3.59it/s] 35%|███▍ | 129913/371472 [10:19:41<18:11:23, 3.69it/s] 35%|███▍ | 129914/371472 [10:19:41<17:30:05, 3.83it/s] 35%|███▍ | 129915/371472 [10:19:41<17:32:06, 3.83it/s] 35%|███▍ | 129916/371472 [10:19:42<18:05:27, 3.71it/s] 35%|███▍ | 129917/371472 [10:19:42<18:09:32, 3.70it/s] 35%|███▍ | 129918/371472 [10:19:42<17:54:02, 3.75it/s] 35%|███▍ | 129919/371472 [10:19:43<17:52:26, 3.75it/s] 35%|███▍ | 129920/371472 [10:19:43<17:26:58, 3.85it/s] {'loss': 3.189, 'learning_rate': 6.855459070438922e-07, 'epoch': 5.6} + 35%|███▍ | 129920/371472 [10:19:43<17:26:58, 3.85it/s] 35%|███▍ | 129921/371472 [10:19:43<17:16:12, 3.89it/s] 35%|███▍ | 129922/371472 [10:19:43<17:26:14, 3.85it/s] 35%|███▍ | 129923/371472 [10:19:44<17:38:00, 3.81it/s] 35%|███▍ | 129924/371472 [10:19:44<18:23:59, 3.65it/s] 35%|███▍ | 129925/371472 [10:19:44<17:36:59, 3.81it/s] 35%|███▍ | 129926/371472 [10:19:44<18:17:25, 3.67it/s] 35%|███▍ | 129927/371472 [10:19:45<18:06:10, 3.71it/s] 35%|███▍ | 129928/371472 [10:19:45<17:28:08, 3.84it/s] 35%|███▍ | 129929/371472 [10:19:45<18:59:50, 3.53it/s] 35%|███▍ | 129930/371472 [10:19:46<18:35:53, 3.61it/s] 35%|███▍ | 129931/371472 [10:19:46<21:34:58, 3.11it/s] 35%|███▍ | 129932/371472 [10:19:46<21:37:49, 3.10it/s] 35%|███▍ | 129933/371472 [10:19:47<21:42:02, 3.09it/s] 35%|███▍ | 129934/371472 [10:19:47<21:58:59, 3.05it/s] 35%|███▍ | 129935/371472 [10:19:47<21:01:54, 3.19it/s] 35%|███▍ | 129936/371472 [10:19:48<22:41:08, 2.96it/s] 35%|███▍ | 129937/371472 [10:19:48<21:06:09, 3.18it/s] 35%|███▍ | 129938/371472 [10:19:48<20:13:54, 3.32it/s] 35%|███▍ | 129939/371472 [10:19:48<19:34:19, 3.43it/s] 35%|███▍ | 129940/371472 [10:19:49<21:00:37, 3.19it/s] {'loss': 3.1366, 'learning_rate': 6.854974250684134e-07, 'epoch': 5.6} + 35%|███▍ | 129940/371472 [10:19:49<21:00:37, 3.19it/s] 35%|███▍ | 129941/371472 [10:19:49<20:56:34, 3.20it/s] 35%|███▍ | 129942/371472 [10:19:49<20:09:51, 3.33it/s] 35%|███▍ | 129943/371472 [10:19:50<20:30:13, 3.27it/s] 35%|███▍ | 129944/371472 [10:19:50<21:36:55, 3.10it/s] 35%|███▍ | 129945/371472 [10:19:50<20:25:56, 3.28it/s] 35%|███▍ | 129946/371472 [10:19:51<19:39:18, 3.41it/s] 35%|███▍ | 129947/371472 [10:19:51<20:11:33, 3.32it/s] 35%|███▍ | 129948/371472 [10:19:51<19:28:36, 3.44it/s] 35%|███▍ | 129949/371472 [10:19:52<20:59:45, 3.20it/s] 35%|███▍ | 129950/371472 [10:19:52<20:03:59, 3.34it/s] 35%|███▍ | 129951/371472 [10:19:52<19:29:39, 3.44it/s] 35%|███▍ | 129952/371472 [10:19:52<18:59:59, 3.53it/s] 35%|███▍ | 129953/371472 [10:19:53<20:33:35, 3.26it/s] 35%|███▍ | 129954/371472 [10:19:53<20:10:11, 3.33it/s] 35%|███▍ | 129955/371472 [10:19:53<20:09:22, 3.33it/s] 35%|███▍ | 129956/371472 [10:19:54<20:43:36, 3.24it/s] 35%|███▍ | 129957/371472 [10:19:54<20:08:19, 3.33it/s] 35%|███▍ | 129958/371472 [10:19:54<20:47:08, 3.23it/s] 35%|███▍ | 129959/371472 [10:19:54<19:16:26, 3.48it/s] 35%|███▍ | 129960/371472 [10:19:55<18:48:09, 3.57it/s] {'loss': 3.3031, 'learning_rate': 6.854489430929346e-07, 'epoch': 5.6} + 35%|███▍ | 129960/371472 [10:19:55<18:48:09, 3.57it/s] 35%|███▍ | 129961/371472 [10:19:55<19:30:49, 3.44it/s] 35%|███▍ | 129962/371472 [10:19:55<19:07:33, 3.51it/s] 35%|███▍ | 129963/371472 [10:19:56<18:35:51, 3.61it/s] 35%|███▍ | 129964/371472 [10:19:56<18:22:26, 3.65it/s] 35%|███▍ | 129965/371472 [10:19:56<18:40:28, 3.59it/s] 35%|███▍ | 129966/371472 [10:19:56<18:10:23, 3.69it/s] 35%|███▍ | 129967/371472 [10:19:57<17:54:02, 3.75it/s] 35%|███▍ | 129968/371472 [10:19:57<17:40:41, 3.79it/s] 35%|███▍ | 129969/371472 [10:19:57<18:01:01, 3.72it/s] 35%|███▍ | 129970/371472 [10:19:57<18:51:53, 3.56it/s] 35%|███▍ | 129971/371472 [10:19:58<18:58:10, 3.54it/s] 35%|███▍ | 129972/371472 [10:19:58<23:21:45, 2.87it/s] 35%|███▍ | 129973/371472 [10:19:59<21:11:50, 3.16it/s] 35%|███▍ | 129974/371472 [10:19:59<20:50:18, 3.22it/s] 35%|███▍ | 129975/371472 [10:19:59<19:41:06, 3.41it/s] 35%|███▍ | 129976/371472 [10:19:59<20:52:52, 3.21it/s] 35%|███▍ | 129977/371472 [10:20:00<20:26:07, 3.28it/s] 35%|███▍ | 129978/371472 [10:20:00<20:07:32, 3.33it/s] 35%|███▍ | 129979/371472 [10:20:00<19:27:49, 3.45it/s] 35%|███▍ | 129980/371472 [10:20:01<18:41:25, 3.59it/s] {'loss': 3.3899, 'learning_rate': 6.854004611174556e-07, 'epoch': 5.6} + 35%|███▍ | 129980/371472 [10:20:01<18:41:25, 3.59it/s] 35%|███▍ | 129981/371472 [10:20:01<18:32:40, 3.62it/s] 35%|███▍ | 129982/371472 [10:20:01<17:54:10, 3.75it/s] 35%|███▍ | 129983/371472 [10:20:01<18:02:10, 3.72it/s] 35%|███▍ | 129984/371472 [10:20:02<18:04:39, 3.71it/s] 35%|███▍ | 129985/371472 [10:20:02<17:29:46, 3.83it/s] 35%|███▍ | 129986/371472 [10:20:02<17:28:51, 3.84it/s] 35%|███▍ | 129987/371472 [10:20:02<18:17:52, 3.67it/s] 35%|███▍ | 129988/371472 [10:20:03<17:59:21, 3.73it/s] 35%|███▍ | 129989/371472 [10:20:03<19:22:48, 3.46it/s] 35%|███▍ | 129990/371472 [10:20:03<18:56:17, 3.54it/s] 35%|███▍ | 129991/371472 [10:20:03<18:09:49, 3.69it/s] 35%|███▍ | 129992/371472 [10:20:04<18:06:58, 3.70it/s] 35%|███▍ | 129993/371472 [10:20:04<18:32:22, 3.62it/s] 35%|███▍ | 129994/371472 [10:20:04<19:04:50, 3.52it/s] 35%|███▍ | 129995/371472 [10:20:05<19:29:09, 3.44it/s] 35%|███▍ | 129996/371472 [10:20:05<18:48:38, 3.57it/s] 35%|███▍ | 129997/371472 [10:20:05<17:58:46, 3.73it/s] 35%|███▍ | 129998/371472 [10:20:05<17:26:27, 3.85it/s] 35%|███▍ | 129999/371472 [10:20:06<17:28:07, 3.84it/s] 35%|███▍ | 130000/371472 [10:20:06<17:20:53, 3.87it/s] {'loss': 3.0167, 'learning_rate': 6.853519791419767e-07, 'epoch': 5.6} + 35%|███▍ | 130000/371472 [10:20:06<17:20:53, 3.87it/s] 35%|███▍ | 130001/371472 [10:20:06<17:12:44, 3.90it/s] 35%|███▍ | 130002/371472 [10:20:06<17:58:38, 3.73it/s] 35%|███▍ | 130003/371472 [10:20:07<19:24:53, 3.45it/s] 35%|███▍ | 130004/371472 [10:20:07<18:29:55, 3.63it/s] 35%|███▍ | 130005/371472 [10:20:07<18:00:00, 3.73it/s] 35%|███▍ | 130006/371472 [10:20:08<18:04:31, 3.71it/s] 35%|███▍ | 130007/371472 [10:20:08<21:25:08, 3.13it/s] 35%|███▍ | 130008/371472 [10:20:08<21:19:16, 3.15it/s] 35%|███▍ | 130009/371472 [10:20:09<21:06:36, 3.18it/s] 35%|███▍ | 130010/371472 [10:20:09<21:30:22, 3.12it/s] 35%|███▍ | 130011/371472 [10:20:09<22:13:27, 3.02it/s] 35%|███▍ | 130012/371472 [10:20:10<21:11:32, 3.16it/s] 35%|███▍ | 130013/371472 [10:20:10<19:47:37, 3.39it/s] 35%|███▍ | 130014/371472 [10:20:10<19:19:57, 3.47it/s] 35%|███▍ | 130015/371472 [10:20:10<19:57:52, 3.36it/s] 35%|███▌ | 130016/371472 [10:20:11<19:21:35, 3.46it/s] 35%|███▌ | 130017/371472 [10:20:11<19:03:14, 3.52it/s] 35%|███▌ | 130018/371472 [10:20:11<18:38:24, 3.60it/s] 35%|███▌ | 130019/371472 [10:20:11<18:16:15, 3.67it/s] 35%|███▌ | 130020/371472 [10:20:12<17:38:03, 3.80it/s] {'loss': 3.1078, 'learning_rate': 6.853034971664979e-07, 'epoch': 5.6} + 35%|███▌ | 130020/371472 [10:20:12<17:38:03, 3.80it/s] 35%|███▌ | 130021/371472 [10:20:12<19:21:52, 3.46it/s] 35%|███▌ | 130022/371472 [10:20:12<20:09:03, 3.33it/s] 35%|███▌ | 130023/371472 [10:20:13<19:13:33, 3.49it/s] 35%|███▌ | 130024/371472 [10:20:13<19:21:08, 3.47it/s] 35%|███▌ | 130025/371472 [10:20:13<18:29:38, 3.63it/s] 35%|███▌ | 130026/371472 [10:20:13<17:42:49, 3.79it/s] 35%|███▌ | 130027/371472 [10:20:14<18:33:42, 3.61it/s] 35%|███▌ | 130028/371472 [10:20:14<18:49:58, 3.56it/s] 35%|███▌ | 130029/371472 [10:20:14<18:48:24, 3.57it/s] 35%|███▌ | 130030/371472 [10:20:15<18:45:30, 3.58it/s] 35%|███▌ | 130031/371472 [10:20:15<18:35:12, 3.61it/s] 35%|███▌ | 130032/371472 [10:20:15<19:18:34, 3.47it/s] 35%|███▌ | 130033/371472 [10:20:15<18:47:54, 3.57it/s] 35%|███▌ | 130034/371472 [10:20:16<19:51:05, 3.38it/s] 35%|███▌ | 130035/371472 [10:20:16<18:45:43, 3.57it/s] 35%|███▌ | 130036/371472 [10:20:16<18:46:07, 3.57it/s] 35%|███▌ | 130037/371472 [10:20:17<19:01:01, 3.53it/s] 35%|███▌ | 130038/371472 [10:20:17<20:41:39, 3.24it/s] 35%|███▌ | 130039/371472 [10:20:17<19:59:04, 3.36it/s] 35%|███▌ | 130040/371472 [10:20:17<19:10:22, 3.50it/s] {'loss': 3.0785, 'learning_rate': 6.85255015191019e-07, 'epoch': 5.6} + 35%|███▌ | 130040/371472 [10:20:17<19:10:22, 3.50it/s] 35%|███▌ | 130041/371472 [10:20:18<21:10:07, 3.17it/s] 35%|███▌ | 130042/371472 [10:20:18<19:17:39, 3.48it/s] 35%|███▌ | 130043/371472 [10:20:18<18:34:25, 3.61it/s] 35%|███▌ | 130044/371472 [10:20:19<17:59:37, 3.73it/s] 35%|███▌ | 130045/371472 [10:20:19<17:56:57, 3.74it/s] 35%|███▌ | 130046/371472 [10:20:19<17:30:33, 3.83it/s] 35%|███▌ | 130047/371472 [10:20:19<17:00:00, 3.94it/s] 35%|███▌ | 130048/371472 [10:20:20<18:12:16, 3.68it/s] 35%|███▌ | 130049/371472 [10:20:20<18:35:35, 3.61it/s] 35%|███▌ | 130050/371472 [10:20:20<18:16:28, 3.67it/s] 35%|███▌ | 130051/371472 [10:20:20<18:13:18, 3.68it/s] 35%|███▌ | 130052/371472 [10:20:21<18:57:55, 3.54it/s] 35%|███▌ | 130053/371472 [10:20:21<18:43:39, 3.58it/s] 35%|███▌ | 130054/371472 [10:20:21<18:44:29, 3.58it/s] 35%|███▌ | 130055/371472 [10:20:22<18:49:31, 3.56it/s] 35%|███▌ | 130056/371472 [10:20:22<19:17:42, 3.48it/s] 35%|███▌ | 130057/371472 [10:20:22<19:12:17, 3.49it/s] 35%|███▌ | 130058/371472 [10:20:22<18:40:18, 3.59it/s] 35%|███▌ | 130059/371472 [10:20:23<19:36:27, 3.42it/s] 35%|███▌ | 130060/371472 [10:20:23<19:24:16, 3.46it/s] {'loss': 3.1983, 'learning_rate': 6.8520653321554e-07, 'epoch': 5.6} + 35%|███▌ | 130060/371472 [10:20:23<19:24:16, 3.46it/s] 35%|███▌ | 130061/371472 [10:20:23<20:16:17, 3.31it/s] 35%|███▌ | 130062/371472 [10:20:24<19:11:04, 3.50it/s] 35%|███▌ | 130063/371472 [10:20:24<18:57:44, 3.54it/s] 35%|███▌ | 130064/371472 [10:20:24<19:13:34, 3.49it/s] 35%|███▌ | 130065/371472 [10:20:25<19:11:23, 3.49it/s] 35%|███▌ | 130066/371472 [10:20:25<18:18:23, 3.66it/s] 35%|███▌ | 130067/371472 [10:20:25<18:31:34, 3.62it/s] 35%|███▌ | 130068/371472 [10:20:25<17:52:35, 3.75it/s] 35%|███▌ | 130069/371472 [10:20:26<18:29:06, 3.63it/s] 35%|███▌ | 130070/371472 [10:20:26<18:49:39, 3.56it/s] 35%|███▌ | 130071/371472 [10:20:26<20:13:18, 3.32it/s] 35%|███▌ | 130072/371472 [10:20:27<20:00:39, 3.35it/s] 35%|███▌ | 130073/371472 [10:20:27<20:31:40, 3.27it/s] 35%|███▌ | 130074/371472 [10:20:27<20:19:23, 3.30it/s] 35%|███▌ | 130075/371472 [10:20:27<19:45:43, 3.39it/s] 35%|███▌ | 130076/371472 [10:20:28<18:34:20, 3.61it/s] 35%|███▌ | 130077/371472 [10:20:28<18:08:54, 3.69it/s] 35%|███▌ | 130078/371472 [10:20:28<19:26:06, 3.45it/s] 35%|███▌ | 130079/371472 [10:20:29<20:45:00, 3.23it/s] 35%|███▌ | 130080/371472 [10:20:29<21:52:22, 3.07it/s] {'loss': 3.2296, 'learning_rate': 6.851580512400611e-07, 'epoch': 5.6} + 35%|███▌ | 130080/371472 [10:20:29<21:52:22, 3.07it/s] 35%|███▌ | 130081/371472 [10:20:29<20:36:21, 3.25it/s] 35%|███▌ | 130082/371472 [10:20:30<20:16:08, 3.31it/s] 35%|███▌ | 130083/371472 [10:20:30<20:01:34, 3.35it/s] 35%|███▌ | 130084/371472 [10:20:30<18:56:44, 3.54it/s] 35%|███▌ | 130085/371472 [10:20:30<19:50:40, 3.38it/s] 35%|███▌ | 130086/371472 [10:20:31<20:21:34, 3.29it/s] 35%|███▌ | 130087/371472 [10:20:31<21:19:30, 3.14it/s] 35%|███▌ | 130088/371472 [10:20:31<21:29:56, 3.12it/s] 35%|███▌ | 130089/371472 [10:20:32<21:00:47, 3.19it/s] 35%|███▌ | 130090/371472 [10:20:32<20:48:45, 3.22it/s] 35%|███▌ | 130091/371472 [10:20:32<19:44:04, 3.40it/s] 35%|███▌ | 130092/371472 [10:20:33<19:44:01, 3.40it/s] 35%|███▌ | 130093/371472 [10:20:33<19:28:03, 3.44it/s] 35%|███▌ | 130094/371472 [10:20:33<18:43:22, 3.58it/s] 35%|███▌ | 130095/371472 [10:20:33<18:02:16, 3.72it/s] 35%|███▌ | 130096/371472 [10:20:34<17:50:38, 3.76it/s] 35%|███▌ | 130097/371472 [10:20:34<18:06:19, 3.70it/s] 35%|███▌ | 130098/371472 [10:20:34<18:46:14, 3.57it/s] 35%|███▌ | 130099/371472 [10:20:34<18:59:19, 3.53it/s] 35%|███▌ | 130100/371472 [10:20:35<18:44:40, 3.58it/s] {'loss': 3.3313, 'learning_rate': 6.851095692645823e-07, 'epoch': 5.6} + 35%|███▌ | 130100/371472 [10:20:35<18:44:40, 3.58it/s] 35%|███▌ | 130101/371472 [10:20:35<19:26:33, 3.45it/s] 35%|███▌ | 130102/371472 [10:20:35<18:23:49, 3.64it/s] 35%|███▌ | 130103/371472 [10:20:36<18:01:01, 3.72it/s] 35%|███▌ | 130104/371472 [10:20:36<19:16:12, 3.48it/s] 35%|███▌ | 130105/371472 [10:20:36<18:36:37, 3.60it/s] 35%|███▌ | 130106/371472 [10:20:36<17:56:16, 3.74it/s] 35%|███▌ | 130107/371472 [10:20:37<17:52:09, 3.75it/s] 35%|███▌ | 130108/371472 [10:20:37<17:28:12, 3.84it/s] 35%|███▌ | 130109/371472 [10:20:37<17:09:46, 3.91it/s] 35%|███▌ | 130110/371472 [10:20:37<18:23:28, 3.65it/s] 35%|███▌ | 130111/371472 [10:20:38<17:26:41, 3.84it/s] 35%|███▌ | 130112/371472 [10:20:38<17:17:08, 3.88it/s] 35%|███▌ | 130113/371472 [10:20:38<17:21:55, 3.86it/s] 35%|███▌ | 130114/371472 [10:20:38<17:57:08, 3.73it/s] 35%|███▌ | 130115/371472 [10:20:39<20:09:30, 3.33it/s] 35%|███▌ | 130116/371472 [10:20:39<20:32:51, 3.26it/s] 35%|███▌ | 130117/371472 [10:20:40<23:08:56, 2.90it/s] 35%|███▌ | 130118/371472 [10:20:40<22:27:04, 2.99it/s] 35%|███▌ | 130119/371472 [10:20:40<21:21:39, 3.14it/s] 35%|███▌ | 130120/371472 [10:20:41<22:37:33, 2.96it/s] {'loss': 3.2221, 'learning_rate': 6.850610872891035e-07, 'epoch': 5.6} + 35%|███▌ | 130120/371472 [10:20:41<22:37:33, 2.96it/s] 35%|███▌ | 130121/371472 [10:20:41<21:19:09, 3.14it/s] 35%|███▌ | 130122/371472 [10:20:41<20:08:37, 3.33it/s] 35%|███▌ | 130123/371472 [10:20:41<20:47:46, 3.22it/s] 35%|███▌ | 130124/371472 [10:20:42<21:20:13, 3.14it/s] 35%|███▌ | 130125/371472 [10:20:42<20:54:43, 3.21it/s] 35%|███▌ | 130126/371472 [10:20:42<19:33:24, 3.43it/s] 35%|███▌ | 130127/371472 [10:20:43<18:33:51, 3.61it/s] 35%|███▌ | 130128/371472 [10:20:43<18:17:18, 3.67it/s] 35%|███▌ | 130129/371472 [10:20:43<18:06:19, 3.70it/s] 35%|███▌ | 130130/371472 [10:20:43<18:30:41, 3.62it/s] 35%|███▌ | 130131/371472 [10:20:44<18:14:40, 3.67it/s] 35%|███▌ | 130132/371472 [10:20:44<19:35:10, 3.42it/s] 35%|███▌ | 130133/371472 [10:20:44<20:01:06, 3.35it/s] 35%|███▌ | 130134/371472 [10:20:45<20:47:35, 3.22it/s] 35%|███▌ | 130135/371472 [10:20:45<20:19:44, 3.30it/s] 35%|███▌ | 130136/371472 [10:20:45<19:49:31, 3.38it/s] 35%|███▌ | 130137/371472 [10:20:46<20:42:38, 3.24it/s] 35%|███▌ | 130138/371472 [10:20:46<20:45:46, 3.23it/s] 35%|███▌ | 130139/371472 [10:20:46<20:59:50, 3.19it/s] 35%|███▌ | 130140/371472 [10:20:46<20:41:08, 3.24it/s] {'loss': 3.1508, 'learning_rate': 6.850126053136245e-07, 'epoch': 5.61} + 35%|███▌ | 130140/371472 [10:20:46<20:41:08, 3.24it/s] 35%|███▌ | 130141/371472 [10:20:47<19:56:28, 3.36it/s] 35%|███▌ | 130142/371472 [10:20:47<19:36:14, 3.42it/s] 35%|███▌ | 130143/371472 [10:20:47<19:51:15, 3.38it/s] 35%|███▌ | 130144/371472 [10:20:48<21:18:51, 3.15it/s] 35%|███▌ | 130145/371472 [10:20:48<20:30:00, 3.27it/s] 35%|███▌ | 130146/371472 [10:20:48<19:16:33, 3.48it/s] 35%|███▌ | 130147/371472 [10:20:48<19:16:13, 3.48it/s] 35%|███▌ | 130148/371472 [10:20:49<19:07:28, 3.51it/s] 35%|███▌ | 130149/371472 [10:20:49<19:28:30, 3.44it/s] 35%|███▌ | 130150/371472 [10:20:49<19:13:46, 3.49it/s] 35%|███▌ | 130151/371472 [10:20:50<19:34:46, 3.42it/s] 35%|███▌ | 130152/371472 [10:20:50<18:51:53, 3.55it/s] 35%|███▌ | 130153/371472 [10:20:50<18:41:12, 3.59it/s] 35%|███▌ | 130154/371472 [10:20:50<18:24:01, 3.64it/s] 35%|███▌ | 130155/371472 [10:20:51<18:37:00, 3.60it/s] 35%|███▌ | 130156/371472 [10:20:51<18:23:45, 3.64it/s] 35%|███▌ | 130157/371472 [10:20:51<19:05:19, 3.51it/s] 35%|███▌ | 130158/371472 [10:20:52<18:41:47, 3.59it/s] 35%|███▌ | 130159/371472 [10:20:52<18:46:46, 3.57it/s] 35%|███▌ | 130160/371472 [10:20:52<18:10:15, 3.69it/s] {'loss': 3.0391, 'learning_rate': 6.849641233381456e-07, 'epoch': 5.61} + 35%|███▌ | 130160/371472 [10:20:52<18:10:15, 3.69it/s] 35%|███▌ | 130161/371472 [10:20:52<17:27:49, 3.84it/s] 35%|███▌ | 130162/371472 [10:20:53<17:52:44, 3.75it/s] 35%|███▌ | 130163/371472 [10:20:53<17:24:02, 3.85it/s] 35%|███▌ | 130164/371472 [10:20:53<17:29:57, 3.83it/s] 35%|███▌ | 130165/371472 [10:20:53<17:29:39, 3.83it/s] 35%|███▌ | 130166/371472 [10:20:54<18:06:46, 3.70it/s] 35%|███▌ | 130167/371472 [10:20:54<18:28:24, 3.63it/s] 35%|███▌ | 130168/371472 [10:20:54<17:59:07, 3.73it/s] 35%|███▌ | 130169/371472 [10:20:55<18:43:02, 3.58it/s] 35%|███▌ | 130170/371472 [10:20:55<18:24:57, 3.64it/s] 35%|███▌ | 130171/371472 [10:20:55<18:10:30, 3.69it/s] 35%|███▌ | 130172/371472 [10:20:55<17:57:40, 3.73it/s] 35%|███▌ | 130173/371472 [10:20:56<18:43:46, 3.58it/s] 35%|███▌ | 130174/371472 [10:20:56<19:50:27, 3.38it/s] 35%|███▌ | 130175/371472 [10:20:56<19:11:50, 3.49it/s] 35%|███▌ | 130176/371472 [10:20:57<19:48:48, 3.38it/s] 35%|███▌ | 130177/371472 [10:20:57<19:24:18, 3.45it/s] 35%|███▌ | 130178/371472 [10:20:57<18:54:57, 3.54it/s] 35%|███▌ | 130179/371472 [10:20:57<18:19:34, 3.66it/s] 35%|███▌ | 130180/371472 [10:20:58<18:01:54, 3.72it/s] {'loss': 3.1049, 'learning_rate': 6.849156413626667e-07, 'epoch': 5.61} + 35%|███▌ | 130180/371472 [10:20:58<18:01:54, 3.72it/s] 35%|███▌ | 130181/371472 [10:20:58<17:44:43, 3.78it/s] 35%|███▌ | 130182/371472 [10:20:58<17:37:30, 3.80it/s] 35%|███▌ | 130183/371472 [10:20:58<17:50:28, 3.76it/s] 35%|███▌ | 130184/371472 [10:20:59<18:20:34, 3.65it/s] 35%|███▌ | 130185/371472 [10:20:59<18:22:53, 3.65it/s] 35%|███▌ | 130186/371472 [10:20:59<20:55:42, 3.20it/s] 35%|███▌ | 130187/371472 [10:21:00<21:17:29, 3.15it/s] 35%|███▌ | 130188/371472 [10:21:00<21:38:51, 3.10it/s] 35%|███▌ | 130189/371472 [10:21:00<20:19:32, 3.30it/s] 35%|███▌ | 130190/371472 [10:21:01<19:31:19, 3.43it/s] 35%|███▌ | 130191/371472 [10:21:01<18:25:53, 3.64it/s] 35%|███▌ | 130192/371472 [10:21:01<21:03:04, 3.18it/s] 35%|███▌ | 130193/371472 [10:21:01<20:52:53, 3.21it/s] 35%|███▌ | 130194/371472 [10:21:02<20:25:36, 3.28it/s] 35%|███▌ | 130195/371472 [10:21:02<19:21:22, 3.46it/s] 35%|███▌ | 130196/371472 [10:21:02<19:03:46, 3.52it/s] 35%|███▌ | 130197/371472 [10:21:03<18:49:56, 3.56it/s] 35%|███▌ | 130198/371472 [10:21:03<19:47:41, 3.39it/s] 35%|███▌ | 130199/371472 [10:21:03<18:43:34, 3.58it/s] 35%|███▌ | 130200/371472 [10:21:03<18:29:10, 3.63it/s] {'loss': 3.0774, 'learning_rate': 6.848671593871878e-07, 'epoch': 5.61} + 35%|███▌ | 130200/371472 [10:21:03<18:29:10, 3.63it/s] 35%|███▌ | 130201/371472 [10:21:04<19:15:23, 3.48it/s] 35%|███▌ | 130202/371472 [10:21:04<18:39:04, 3.59it/s] 35%|███▌ | 130203/371472 [10:21:04<17:59:34, 3.72it/s] 35%|███▌ | 130204/371472 [10:21:04<17:44:31, 3.78it/s] 35%|███▌ | 130205/371472 [10:21:05<19:11:00, 3.49it/s] 35%|███▌ | 130206/371472 [10:21:05<18:45:23, 3.57it/s] 35%|███▌ | 130207/371472 [10:21:05<18:55:10, 3.54it/s] 35%|███▌ | 130208/371472 [10:21:06<18:49:48, 3.56it/s] 35%|███▌ | 130209/371472 [10:21:06<18:58:54, 3.53it/s] 35%|███▌ | 130210/371472 [10:21:06<19:15:03, 3.48it/s] 35%|███▌ | 130211/371472 [10:21:06<19:00:57, 3.52it/s] 35%|███▌ | 130212/371472 [10:21:07<18:33:32, 3.61it/s] 35%|███▌ | 130213/371472 [10:21:07<18:01:37, 3.72it/s] 35%|███▌ | 130214/371472 [10:21:07<17:32:02, 3.82it/s] 35%|███▌ | 130215/371472 [10:21:08<17:50:15, 3.76it/s] 35%|███▌ | 130216/371472 [10:21:08<18:19:09, 3.66it/s] 35%|███▌ | 130217/371472 [10:21:08<18:43:53, 3.58it/s] 35%|███▌ | 130218/371472 [10:21:08<18:49:24, 3.56it/s] 35%|███▌ | 130219/371472 [10:21:09<18:03:17, 3.71it/s] 35%|███▌ | 130220/371472 [10:21:09<19:43:14, 3.40it/s] {'loss': 3.0843, 'learning_rate': 6.848186774117089e-07, 'epoch': 5.61} + 35%|███▌ | 130220/371472 [10:21:09<19:43:14, 3.40it/s] 35%|███▌ | 130221/371472 [10:21:09<19:13:51, 3.48it/s] 35%|███▌ | 130222/371472 [10:21:10<18:48:08, 3.56it/s] 35%|███▌ | 130223/371472 [10:21:10<18:23:20, 3.64it/s] 35%|███▌ | 130224/371472 [10:21:10<19:13:05, 3.49it/s] 35%|███▌ | 130225/371472 [10:21:10<19:20:54, 3.46it/s] 35%|███▌ | 130226/371472 [10:21:11<19:23:49, 3.45it/s] 35%|███▌ | 130227/371472 [10:21:11<19:04:29, 3.51it/s] 35%|███▌ | 130228/371472 [10:21:11<19:17:26, 3.47it/s] 35%|███▌ | 130229/371472 [10:21:12<20:10:18, 3.32it/s] 35%|███▌ | 130230/371472 [10:21:12<20:01:57, 3.35it/s] 35%|███▌ | 130231/371472 [10:21:12<20:21:53, 3.29it/s] 35%|███▌ | 130232/371472 [10:21:12<19:42:27, 3.40it/s] 35%|███▌ | 130233/371472 [10:21:13<18:56:49, 3.54it/s] 35%|███▌ | 130234/371472 [10:21:13<18:42:37, 3.58it/s] 35%|███▌ | 130235/371472 [10:21:13<18:12:07, 3.68it/s] 35%|███▌ | 130236/371472 [10:21:13<17:28:33, 3.83it/s] 35%|███▌ | 130237/371472 [10:21:14<17:30:32, 3.83it/s] 35%|███▌ | 130238/371472 [10:21:14<17:11:43, 3.90it/s] 35%|███▌ | 130239/371472 [10:21:14<17:44:22, 3.78it/s] 35%|███▌ | 130240/371472 [10:21:15<18:33:58, 3.61it/s] {'loss': 3.109, 'learning_rate': 6.847701954362299e-07, 'epoch': 5.61} + 35%|███▌ | 130240/371472 [10:21:15<18:33:58, 3.61it/s] 35%|███▌ | 130241/371472 [10:21:15<21:42:12, 3.09it/s] 35%|███▌ | 130242/371472 [10:21:15<21:36:11, 3.10it/s] 35%|███▌ | 130243/371472 [10:21:16<20:11:27, 3.32it/s] 35%|███▌ | 130244/371472 [10:21:16<19:25:38, 3.45it/s] 35%|███▌ | 130245/371472 [10:21:16<18:43:38, 3.58it/s] 35%|███▌ | 130246/371472 [10:21:16<18:33:43, 3.61it/s] 35%|███▌ | 130247/371472 [10:21:17<17:31:27, 3.82it/s] 35%|███▌ | 130248/371472 [10:21:17<17:29:50, 3.83it/s] 35%|███▌ | 130249/371472 [10:21:17<17:39:42, 3.79it/s] 35%|███▌ | 130250/371472 [10:21:17<17:22:52, 3.86it/s] 35%|███▌ | 130251/371472 [10:21:18<18:06:10, 3.70it/s] 35%|███▌ | 130252/371472 [10:21:18<18:25:54, 3.64it/s] 35%|███▌ | 130253/371472 [10:21:18<19:48:47, 3.38it/s] 35%|███▌ | 130254/371472 [10:21:19<19:35:44, 3.42it/s] 35%|███▌ | 130255/371472 [10:21:19<19:54:45, 3.36it/s] 35%|███▌ | 130256/371472 [10:21:19<20:02:25, 3.34it/s] 35%|███▌ | 130257/371472 [10:21:19<19:55:20, 3.36it/s] 35%|███▌ | 130258/371472 [10:21:20<19:54:48, 3.36it/s] 35%|███▌ | 130259/371472 [10:21:20<19:17:45, 3.47it/s] 35%|███▌ | 130260/371472 [10:21:20<19:06:40, 3.51it/s] {'loss': 3.2645, 'learning_rate': 6.847217134607512e-07, 'epoch': 5.61} + 35%|███▌ | 130260/371472 [10:21:20<19:06:40, 3.51it/s] 35%|███▌ | 130261/371472 [10:21:21<18:59:06, 3.53it/s] 35%|███▌ | 130262/371472 [10:21:21<19:35:08, 3.42it/s] 35%|███▌ | 130263/371472 [10:21:21<19:31:53, 3.43it/s] 35%|███▌ | 130264/371472 [10:21:22<20:23:42, 3.29it/s] 35%|███▌ | 130265/371472 [10:21:22<20:01:10, 3.35it/s] 35%|███▌ | 130266/371472 [10:21:22<19:25:14, 3.45it/s] 35%|███▌ | 130267/371472 [10:21:22<18:24:22, 3.64it/s] 35%|███▌ | 130268/371472 [10:21:23<19:45:52, 3.39it/s] 35%|███▌ | 130269/371472 [10:21:23<18:50:03, 3.56it/s] 35%|███▌ | 130270/371472 [10:21:23<18:52:09, 3.55it/s] 35%|███▌ | 130271/371472 [10:21:23<18:44:06, 3.58it/s] 35%|███▌ | 130272/371472 [10:21:24<18:19:45, 3.66it/s] 35%|███▌ | 130273/371472 [10:21:24<20:50:17, 3.22it/s] 35%|███▌ | 130274/371472 [10:21:24<19:49:22, 3.38it/s] 35%|███▌ | 130275/371472 [10:21:25<20:31:09, 3.27it/s] 35%|███▌ | 130276/371472 [10:21:25<19:21:14, 3.46it/s] 35%|███▌ | 130277/371472 [10:21:25<19:42:10, 3.40it/s] 35%|███▌ | 130278/371472 [10:21:26<19:24:30, 3.45it/s] 35%|███▌ | 130279/371472 [10:21:26<19:21:28, 3.46it/s] 35%|███▌ | 130280/371472 [10:21:26<18:54:00, 3.54it/s] {'loss': 3.2293, 'learning_rate': 6.846732314852723e-07, 'epoch': 5.61} + 35%|███▌ | 130280/371472 [10:21:26<18:54:00, 3.54it/s] 35%|███▌ | 130281/371472 [10:21:26<18:41:50, 3.58it/s] 35%|███▌ | 130282/371472 [10:21:27<20:06:57, 3.33it/s] 35%|███▌ | 130283/371472 [10:21:27<19:45:13, 3.39it/s] 35%|███▌ | 130284/371472 [10:21:27<18:42:07, 3.58it/s] 35%|███▌ | 130285/371472 [10:21:28<17:51:07, 3.75it/s] 35%|███▌ | 130286/371472 [10:21:28<17:59:00, 3.73it/s] 35%|███▌ | 130287/371472 [10:21:28<17:22:24, 3.86it/s] 35%|███▌ | 130288/371472 [10:21:28<17:36:47, 3.80it/s] 35%|███▌ | 130289/371472 [10:21:29<17:17:40, 3.87it/s] 35%|███▌ | 130290/371472 [10:21:29<17:04:20, 3.92it/s] 35%|███▌ | 130291/371472 [10:21:29<17:14:16, 3.89it/s] 35%|███▌ | 130292/371472 [10:21:29<17:25:57, 3.84it/s] 35%|███▌ | 130293/371472 [10:21:30<19:12:27, 3.49it/s] 35%|███▌ | 130294/371472 [10:21:30<18:43:11, 3.58it/s] 35%|███▌ | 130295/371472 [10:21:30<18:04:59, 3.70it/s] 35%|███▌ | 130296/371472 [10:21:31<20:59:32, 3.19it/s] 35%|███▌ | 130297/371472 [10:21:31<20:12:24, 3.32it/s] 35%|███▌ | 130298/371472 [10:21:31<19:10:55, 3.49it/s] 35%|███▌ | 130299/371472 [10:21:31<18:44:06, 3.58it/s] 35%|███▌ | 130300/371472 [10:21:32<18:47:44, 3.56it/s] {'loss': 3.2828, 'learning_rate': 6.846247495097932e-07, 'epoch': 5.61} + 35%|███▌ | 130300/371472 [10:21:32<18:47:44, 3.56it/s] 35%|███▌ | 130301/371472 [10:21:32<18:42:14, 3.58it/s] 35%|███▌ | 130302/371472 [10:21:32<19:32:42, 3.43it/s] 35%|███▌ | 130303/371472 [10:21:33<20:03:03, 3.34it/s] 35%|███▌ | 130304/371472 [10:21:33<18:41:55, 3.58it/s] 35%|███▌ | 130305/371472 [10:21:33<18:08:04, 3.69it/s] 35%|███▌ | 130306/371472 [10:21:33<18:25:31, 3.64it/s] 35%|███▌ | 130307/371472 [10:21:34<20:05:13, 3.33it/s] 35%|███▌ | 130308/371472 [10:21:34<19:30:38, 3.43it/s] 35%|███▌ | 130309/371472 [10:21:34<18:44:54, 3.57it/s] 35%|███▌ | 130310/371472 [10:21:34<18:24:33, 3.64it/s] 35%|███▌ | 130311/371472 [10:21:35<19:01:45, 3.52it/s] 35%|███▌ | 130312/371472 [10:21:35<19:08:58, 3.50it/s] 35%|███▌ | 130313/371472 [10:21:35<18:51:26, 3.55it/s] 35%|███▌ | 130314/371472 [10:21:36<18:38:13, 3.59it/s] 35%|███▌ | 130315/371472 [10:21:36<18:21:45, 3.65it/s] 35%|███▌ | 130316/371472 [10:21:36<18:38:46, 3.59it/s] 35%|███▌ | 130317/371472 [10:21:36<18:15:16, 3.67it/s] 35%|███▌ | 130318/371472 [10:21:37<17:34:05, 3.81it/s] 35%|███▌ | 130319/371472 [10:21:37<17:12:47, 3.89it/s] 35%|███▌ | 130320/371472 [10:21:37<16:54:40, 3.96it/s] {'loss': 3.0916, 'learning_rate': 6.845762675343144e-07, 'epoch': 5.61} + 35%|███▌ | 130320/371472 [10:21:37<16:54:40, 3.96it/s] 35%|███▌ | 130321/371472 [10:21:37<18:08:08, 3.69it/s] 35%|███▌ | 130322/371472 [10:21:38<18:27:54, 3.63it/s] 35%|███▌ | 130323/371472 [10:21:38<18:49:59, 3.56it/s] 35%|███▌ | 130324/371472 [10:21:38<18:29:04, 3.62it/s] 35%|███▌ | 130325/371472 [10:21:39<19:03:13, 3.52it/s] 35%|███▌ | 130326/371472 [10:21:39<18:49:25, 3.56it/s] 35%|███▌ | 130327/371472 [10:21:39<20:07:45, 3.33it/s] 35%|███▌ | 130328/371472 [10:21:40<20:41:33, 3.24it/s] 35%|███▌ | 130329/371472 [10:21:40<19:21:35, 3.46it/s] 35%|███▌ | 130330/371472 [10:21:40<19:21:20, 3.46it/s] 35%|███▌ | 130331/371472 [10:21:40<19:09:12, 3.50it/s] 35%|███▌ | 130332/371472 [10:21:41<19:44:04, 3.39it/s] 35%|███▌ | 130333/371472 [10:21:41<19:12:09, 3.49it/s] 35%|███▌ | 130334/371472 [10:21:41<19:39:49, 3.41it/s] 35%|███▌ | 130335/371472 [10:21:42<19:11:50, 3.49it/s] 35%|███▌ | 130336/371472 [10:21:42<20:46:26, 3.22it/s] 35%|███▌ | 130337/371472 [10:21:42<21:39:15, 3.09it/s] 35%|███▌ | 130338/371472 [10:21:43<20:41:51, 3.24it/s] 35%|███▌ | 130339/371472 [10:21:43<19:57:47, 3.36it/s] 35%|███▌ | 130340/371472 [10:21:43<19:30:43, 3.43it/s] {'loss': 3.2169, 'learning_rate': 6.845277855588357e-07, 'epoch': 5.61} + 35%|███▌ | 130340/371472 [10:21:43<19:30:43, 3.43it/s] 35%|███▌ | 130341/371472 [10:21:43<19:38:10, 3.41it/s] 35%|███▌ | 130342/371472 [10:21:44<19:43:56, 3.39it/s] 35%|███▌ | 130343/371472 [10:21:44<19:29:49, 3.44it/s] 35%|███▌ | 130344/371472 [10:21:44<18:53:36, 3.55it/s] 35%|███▌ | 130345/371472 [10:21:45<19:16:50, 3.47it/s] 35%|███▌ | 130346/371472 [10:21:45<18:47:30, 3.56it/s] 35%|███▌ | 130347/371472 [10:21:45<18:19:31, 3.65it/s] 35%|███▌ | 130348/371472 [10:21:45<18:29:24, 3.62it/s] 35%|███▌ | 130349/371472 [10:21:46<17:41:35, 3.79it/s] 35%|███▌ | 130350/371472 [10:21:46<17:35:11, 3.81it/s] 35%|███▌ | 130351/371472 [10:21:46<17:04:57, 3.92it/s] 35%|███▌ | 130352/371472 [10:21:46<18:43:00, 3.58it/s] 35%|███▌ | 130353/371472 [10:21:47<18:37:39, 3.60it/s] 35%|███▌ | 130354/371472 [10:21:47<19:07:16, 3.50it/s] 35%|███▌ | 130355/371472 [10:21:47<19:07:37, 3.50it/s] 35%|███▌ | 130356/371472 [10:21:48<18:48:11, 3.56it/s] 35%|███▌ | 130357/371472 [10:21:48<18:49:10, 3.56it/s] 35%|███▌ | 130358/371472 [10:21:48<18:20:52, 3.65it/s] 35%|███▌ | 130359/371472 [10:21:48<18:18:29, 3.66it/s] 35%|███▌ | 130360/371472 [10:21:49<17:53:45, 3.74it/s] {'loss': 3.2115, 'learning_rate': 6.844793035833566e-07, 'epoch': 5.61} + 35%|███▌ | 130360/371472 [10:21:49<17:53:45, 3.74it/s] 35%|███▌ | 130361/371472 [10:21:49<17:33:56, 3.81it/s] 35%|███▌ | 130362/371472 [10:21:49<18:07:27, 3.70it/s] 35%|███▌ | 130363/371472 [10:21:50<20:08:20, 3.33it/s] 35%|███▌ | 130364/371472 [10:21:50<21:21:38, 3.14it/s] 35%|███▌ | 130365/371472 [10:21:50<20:17:42, 3.30it/s] 35%|███▌ | 130366/371472 [10:21:50<19:28:17, 3.44it/s] 35%|███▌ | 130367/371472 [10:21:51<19:09:44, 3.50it/s] 35%|███▌ | 130368/371472 [10:21:51<18:51:59, 3.55it/s] 35%|███▌ | 130369/371472 [10:21:51<19:11:55, 3.49it/s] 35%|███▌ | 130370/371472 [10:21:51<18:26:51, 3.63it/s] 35%|███▌ | 130371/371472 [10:21:52<20:12:25, 3.31it/s] 35%|███▌ | 130372/371472 [10:21:52<21:16:28, 3.15it/s] 35%|███▌ | 130373/371472 [10:21:53<20:38:04, 3.25it/s] 35%|███▌ | 130374/371472 [10:21:53<19:48:57, 3.38it/s] 35%|███▌ | 130375/371472 [10:21:53<20:23:06, 3.29it/s] 35%|███▌ | 130376/371472 [10:21:53<21:55:17, 3.06it/s] 35%|███▌ | 130377/371472 [10:21:54<21:07:31, 3.17it/s] 35%|███▌ | 130378/371472 [10:21:54<20:12:02, 3.32it/s] 35%|███▌ | 130379/371472 [10:21:54<19:45:59, 3.39it/s] 35%|███▌ | 130380/371472 [10:21:55<20:26:21, 3.28it/s] {'loss': 2.9956, 'learning_rate': 6.844308216078777e-07, 'epoch': 5.62} + 35%|███▌ | 130380/371472 [10:21:55<20:26:21, 3.28it/s] 35%|███▌ | 130381/371472 [10:21:55<19:58:12, 3.35it/s] 35%|███▌ | 130382/371472 [10:21:55<20:14:14, 3.31it/s] 35%|███▌ | 130383/371472 [10:21:56<20:48:13, 3.22it/s] 35%|███▌ | 130384/371472 [10:21:56<20:27:54, 3.27it/s] 35%|███▌ | 130385/371472 [10:21:56<20:08:51, 3.32it/s] 35%|███▌ | 130386/371472 [10:21:56<19:36:07, 3.42it/s] 35%|███▌ | 130387/371472 [10:21:57<19:03:18, 3.51it/s] 35%|███▌ | 130388/371472 [10:21:57<18:49:13, 3.56it/s] 35%|███▌ | 130389/371472 [10:21:57<18:54:45, 3.54it/s] 35%|███▌ | 130390/371472 [10:21:58<18:43:02, 3.58it/s] 35%|███▌ | 130391/371472 [10:21:58<18:47:32, 3.56it/s] 35%|███▌ | 130392/371472 [10:21:58<19:05:46, 3.51it/s] 35%|███▌ | 130393/371472 [10:21:58<18:38:49, 3.59it/s] 35%|███▌ | 130394/371472 [10:21:59<18:06:59, 3.70it/s] 35%|███▌ | 130395/371472 [10:21:59<17:51:20, 3.75it/s] 35%|███▌ | 130396/371472 [10:21:59<17:35:04, 3.81it/s] 35%|███▌ | 130397/371472 [10:21:59<17:05:07, 3.92it/s] 35%|███▌ | 130398/371472 [10:22:00<17:47:35, 3.76it/s] 35%|███▌ | 130399/371472 [10:22:00<18:26:56, 3.63it/s] 35%|███▌ | 130400/371472 [10:22:00<18:13:11, 3.68it/s] {'loss': 3.2077, 'learning_rate': 6.843823396323989e-07, 'epoch': 5.62} + 35%|███▌ | 130400/371472 [10:22:00<18:13:11, 3.68it/s] 35%|███▌ | 130401/371472 [10:22:01<18:50:20, 3.55it/s] 35%|███▌ | 130402/371472 [10:22:01<18:42:02, 3.58it/s] 35%|███▌ | 130403/371472 [10:22:01<18:01:17, 3.72it/s] 35%|███▌ | 130404/371472 [10:22:01<18:26:07, 3.63it/s] 35%|███▌ | 130405/371472 [10:22:02<18:45:20, 3.57it/s] 35%|███▌ | 130406/371472 [10:22:02<18:50:15, 3.55it/s] 35%|███▌ | 130407/371472 [10:22:02<18:33:18, 3.61it/s] 35%|███▌ | 130408/371472 [10:22:02<18:12:25, 3.68it/s] 35%|███▌ | 130409/371472 [10:22:03<19:32:58, 3.43it/s] 35%|███▌ | 130410/371472 [10:22:03<18:37:32, 3.60it/s] 35%|███▌ | 130411/371472 [10:22:03<17:58:03, 3.73it/s] 35%|███▌ | 130412/371472 [10:22:04<18:30:30, 3.62it/s] 35%|███▌ | 130413/371472 [10:22:04<19:00:18, 3.52it/s] 35%|███▌ | 130414/371472 [10:22:04<18:56:49, 3.53it/s] 35%|███▌ | 130415/371472 [10:22:04<18:59:06, 3.53it/s] 35%|███▌ | 130416/371472 [10:22:05<19:03:42, 3.51it/s] 35%|███▌ | 130417/371472 [10:22:05<18:37:29, 3.60it/s] 35%|███▌ | 130418/371472 [10:22:05<19:20:54, 3.46it/s] 35%|███▌ | 130419/371472 [10:22:06<19:23:58, 3.45it/s] 35%|███▌ | 130420/371472 [10:22:06<18:17:30, 3.66it/s] {'loss': 3.1998, 'learning_rate': 6.8433385765692e-07, 'epoch': 5.62} + 35%|███▌ | 130420/371472 [10:22:06<18:17:30, 3.66it/s] 35%|███▌ | 130421/371472 [10:22:06<18:20:56, 3.65it/s] 35%|███▌ | 130422/371472 [10:22:06<17:48:19, 3.76it/s] 35%|███▌ | 130423/371472 [10:22:07<18:44:18, 3.57it/s] 35%|███▌ | 130424/371472 [10:22:07<18:26:44, 3.63it/s] 35%|███▌ | 130425/371472 [10:22:07<18:39:45, 3.59it/s] 35%|███▌ | 130426/371472 [10:22:07<18:22:49, 3.64it/s] 35%|███▌ | 130427/371472 [10:22:08<20:29:00, 3.27it/s] 35%|███▌ | 130428/371472 [10:22:08<19:26:39, 3.44it/s] 35%|███▌ | 130429/371472 [10:22:08<20:09:48, 3.32it/s] 35%|███▌ | 130430/371472 [10:22:09<19:22:37, 3.46it/s] 35%|███▌ | 130431/371472 [10:22:09<19:34:07, 3.42it/s] 35%|███▌ | 130432/371472 [10:22:09<18:57:57, 3.53it/s] 35%|███▌ | 130433/371472 [10:22:10<18:27:14, 3.63it/s] 35%|███▌ | 130434/371472 [10:22:10<18:08:48, 3.69it/s] 35%|███▌ | 130435/371472 [10:22:10<18:03:34, 3.71it/s] 35%|███▌ | 130436/371472 [10:22:10<18:32:21, 3.61it/s] 35%|███▌ | 130437/371472 [10:22:11<18:42:59, 3.58it/s] 35%|███▌ | 130438/371472 [10:22:11<18:31:07, 3.62it/s] 35%|███▌ | 130439/371472 [10:22:11<18:43:04, 3.58it/s] 35%|███▌ | 130440/371472 [10:22:11<17:58:07, 3.73it/s] {'loss': 3.2389, 'learning_rate': 6.84285375681441e-07, 'epoch': 5.62} + 35%|███▌ | 130440/371472 [10:22:11<17:58:07, 3.73it/s] 35%|███▌ | 130441/371472 [10:22:12<18:20:55, 3.65it/s] 35%|███▌ | 130442/371472 [10:22:12<18:33:01, 3.61it/s] 35%|███▌ | 130443/371472 [10:22:12<18:57:12, 3.53it/s] 35%|███▌ | 130444/371472 [10:22:13<19:09:05, 3.50it/s] 35%|███▌ | 130445/371472 [10:22:13<18:33:05, 3.61it/s] 35%|███▌ | 130446/371472 [10:22:13<17:45:45, 3.77it/s] 35%|███▌ | 130447/371472 [10:22:13<18:08:05, 3.69it/s] 35%|███▌ | 130448/371472 [10:22:14<18:09:00, 3.69it/s] 35%|███▌ | 130449/371472 [10:22:14<18:53:47, 3.54it/s] 35%|███▌ | 130450/371472 [10:22:14<19:22:44, 3.45it/s] 35%|███▌ | 130451/371472 [10:22:15<19:24:21, 3.45it/s] 35%|███▌ | 130452/371472 [10:22:15<19:09:39, 3.49it/s] 35%|███▌ | 130453/371472 [10:22:15<19:11:00, 3.49it/s] 35%|███▌ | 130454/371472 [10:22:15<19:01:29, 3.52it/s] 35%|███▌ | 130455/371472 [10:22:16<19:00:46, 3.52it/s] 35%|███▌ | 130456/371472 [10:22:16<20:14:51, 3.31it/s] 35%|███▌ | 130457/371472 [10:22:16<20:19:44, 3.29it/s] 35%|███▌ | 130458/371472 [10:22:17<20:29:40, 3.27it/s] 35%|███▌ | 130459/371472 [10:22:17<19:47:52, 3.38it/s] 35%|███▌ | 130460/371472 [10:22:17<18:38:24, 3.59it/s] {'loss': 3.1101, 'learning_rate': 6.842368937059621e-07, 'epoch': 5.62} + 35%|███▌ | 130460/371472 [10:22:17<18:38:24, 3.59it/s] 35%|███▌ | 130461/371472 [10:22:17<17:59:33, 3.72it/s] 35%|███▌ | 130462/371472 [10:22:18<18:42:25, 3.58it/s] 35%|███▌ | 130463/371472 [10:22:18<18:32:01, 3.61it/s] 35%|███▌ | 130464/371472 [10:22:18<18:42:43, 3.58it/s] 35%|███▌ | 130465/371472 [10:22:19<19:42:24, 3.40it/s] 35%|███▌ | 130466/371472 [10:22:19<19:00:17, 3.52it/s] 35%|███▌ | 130467/371472 [10:22:19<18:35:33, 3.60it/s] 35%|███▌ | 130468/371472 [10:22:19<17:40:54, 3.79it/s] 35%|███▌ | 130469/371472 [10:22:20<17:52:48, 3.74it/s] 35%|███▌ | 130470/371472 [10:22:20<18:17:58, 3.66it/s] 35%|███▌ | 130471/371472 [10:22:20<18:30:50, 3.62it/s] 35%|███▌ | 130472/371472 [10:22:20<18:01:48, 3.71it/s] 35%|███▌ | 130473/371472 [10:22:21<18:12:36, 3.68it/s] 35%|███▌ | 130474/371472 [10:22:21<17:42:46, 3.78it/s] 35%|███▌ | 130475/371472 [10:22:21<17:16:02, 3.88it/s] 35%|███▌ | 130476/371472 [10:22:21<17:39:00, 3.79it/s] 35%|███▌ | 130477/371472 [10:22:22<17:14:40, 3.88it/s] 35%|███▌ | 130478/371472 [10:22:22<17:00:15, 3.94it/s] 35%|███▌ | 130479/371472 [10:22:22<16:36:15, 4.03it/s] 35%|███▌ | 130480/371472 [10:22:22<16:26:10, 4.07it/s] {'loss': 3.2012, 'learning_rate': 6.841884117304833e-07, 'epoch': 5.62} + 35%|███▌ | 130480/371472 [10:22:22<16:26:10, 4.07it/s] 35%|███▌ | 130481/371472 [10:22:23<16:34:35, 4.04it/s] 35%|███▌ | 130482/371472 [10:22:23<16:42:19, 4.01it/s] 35%|███▌ | 130483/371472 [10:22:23<17:15:09, 3.88it/s] 35%|███▌ | 130484/371472 [10:22:23<17:26:23, 3.84it/s] 35%|███▌ | 130485/371472 [10:22:24<17:24:53, 3.84it/s] 35%|███▌ | 130486/371472 [10:22:24<18:07:21, 3.69it/s] 35%|███▌ | 130487/371472 [10:22:24<18:16:58, 3.66it/s] 35%|███▌ | 130488/371472 [10:22:25<18:17:09, 3.66it/s] 35%|███▌ | 130489/371472 [10:22:25<18:00:04, 3.72it/s] 35%|███▌ | 130490/371472 [10:22:25<18:20:44, 3.65it/s] 35%|███▌ | 130491/371472 [10:22:25<17:55:34, 3.73it/s] 35%|███▌ | 130492/371472 [10:22:26<18:26:34, 3.63it/s] 35%|███▌ | 130493/371472 [10:22:26<18:23:55, 3.64it/s] 35%|███▌ | 130494/371472 [10:22:26<17:46:06, 3.77it/s] 35%|███▌ | 130495/371472 [10:22:27<19:10:45, 3.49it/s] 35%|███▌ | 130496/371472 [10:22:27<19:07:39, 3.50it/s] 35%|███▌ | 130497/371472 [10:22:27<20:26:22, 3.27it/s] 35%|███▌ | 130498/371472 [10:22:27<19:28:17, 3.44it/s] 35%|███▌ | 130499/371472 [10:22:28<19:37:34, 3.41it/s] 35%|███▌ | 130500/371472 [10:22:28<18:41:06, 3.58it/s] {'loss': 3.2541, 'learning_rate': 6.841399297550044e-07, 'epoch': 5.62} + 35%|███▌ | 130500/371472 [10:22:28<18:41:06, 3.58it/s] 35%|███▌ | 130501/371472 [10:22:28<18:58:32, 3.53it/s] 35%|███▌ | 130502/371472 [10:22:29<18:47:41, 3.56it/s] 35%|███▌ | 130503/371472 [10:22:29<20:07:13, 3.33it/s] 35%|███▌ | 130504/371472 [10:22:29<19:50:19, 3.37it/s] 35%|███▌ | 130505/371472 [10:22:29<20:05:15, 3.33it/s] 35%|███▌ | 130506/371472 [10:22:30<19:16:31, 3.47it/s] 35%|███▌ | 130507/371472 [10:22:30<18:33:10, 3.61it/s] 35%|███▌ | 130508/371472 [10:22:30<19:18:56, 3.47it/s] 35%|███▌ | 130509/371472 [10:22:31<19:27:55, 3.44it/s] 35%|███▌ | 130510/371472 [10:22:31<18:31:00, 3.61it/s] 35%|███▌ | 130511/371472 [10:22:31<17:54:47, 3.74it/s] 35%|███▌ | 130512/371472 [10:22:31<17:40:27, 3.79it/s] 35%|███▌ | 130513/371472 [10:22:32<17:10:58, 3.90it/s] 35%|███▌ | 130514/371472 [10:22:32<17:06:09, 3.91it/s] 35%|███▌ | 130515/371472 [10:22:32<17:16:49, 3.87it/s] 35%|███▌ | 130516/371472 [10:22:32<16:54:33, 3.96it/s] 35%|███▌ | 130517/371472 [10:22:33<16:49:29, 3.98it/s] 35%|███▌ | 130518/371472 [10:22:33<18:52:49, 3.55it/s] 35%|███▌ | 130519/371472 [10:22:33<19:06:18, 3.50it/s] 35%|███▌ | 130520/371472 [10:22:34<18:53:05, 3.54it/s] {'loss': 3.1097, 'learning_rate': 6.840914477795255e-07, 'epoch': 5.62} + 35%|███▌ | 130520/371472 [10:22:34<18:53:05, 3.54it/s] 35%|███▌ | 130521/371472 [10:22:34<18:15:07, 3.67it/s] 35%|███▌ | 130522/371472 [10:22:34<18:34:16, 3.60it/s] 35%|███▌ | 130523/371472 [10:22:34<19:32:12, 3.43it/s] 35%|███▌ | 130524/371472 [10:22:35<19:02:15, 3.52it/s] 35%|███▌ | 130525/371472 [10:22:35<18:53:46, 3.54it/s] 35%|███▌ | 130526/371472 [10:22:35<18:43:57, 3.57it/s] 35%|███▌ | 130527/371472 [10:22:35<18:21:29, 3.65it/s] 35%|███▌ | 130528/371472 [10:22:36<18:27:33, 3.63it/s] 35%|███▌ | 130529/371472 [10:22:36<18:00:18, 3.72it/s] 35%|███▌ | 130530/371472 [10:22:36<18:23:30, 3.64it/s] 35%|███▌ | 130531/371472 [10:22:37<19:53:07, 3.37it/s] 35%|███▌ | 130532/371472 [10:22:37<21:05:48, 3.17it/s] 35%|███▌ | 130533/371472 [10:22:37<21:03:48, 3.18it/s] 35%|███▌ | 130534/371472 [10:22:38<19:56:05, 3.36it/s] 35%|███▌ | 130535/371472 [10:22:38<20:10:37, 3.32it/s] 35%|███▌ | 130536/371472 [10:22:38<21:23:04, 3.13it/s] 35%|███▌ | 130537/371472 [10:22:38<20:26:18, 3.27it/s] 35%|███▌ | 130538/371472 [10:22:39<24:57:34, 2.68it/s] 35%|███▌ | 130539/371472 [10:22:39<23:19:58, 2.87it/s] 35%|███▌ | 130540/371472 [10:22:40<21:12:59, 3.15it/s] {'loss': 3.0298, 'learning_rate': 6.840429658040466e-07, 'epoch': 5.62} + 35%|███▌ | 130540/371472 [10:22:40<21:12:59, 3.15it/s] 35%|███▌ | 130541/371472 [10:22:40<21:53:25, 3.06it/s] 35%|███▌ | 130542/371472 [10:22:40<21:06:26, 3.17it/s] 35%|███▌ | 130543/371472 [10:22:40<20:48:55, 3.22it/s] 35%|███▌ | 130544/371472 [10:22:41<20:49:00, 3.21it/s] 35%|███▌ | 130545/371472 [10:22:41<20:15:03, 3.30it/s] 35%|███▌ | 130546/371472 [10:22:41<18:59:03, 3.53it/s] 35%|███▌ | 130547/371472 [10:22:42<18:22:06, 3.64it/s] 35%|███▌ | 130548/371472 [10:22:42<19:44:25, 3.39it/s] 35%|███▌ | 130549/371472 [10:22:42<19:33:31, 3.42it/s] 35%|███▌ | 130550/371472 [10:22:42<18:42:30, 3.58it/s] 35%|███▌ | 130551/371472 [10:22:43<18:19:48, 3.65it/s] 35%|███▌ | 130552/371472 [10:22:43<18:41:23, 3.58it/s] 35%|███▌ | 130553/371472 [10:22:43<18:44:42, 3.57it/s] 35%|███▌ | 130554/371472 [10:22:44<17:49:12, 3.76it/s] 35%|███▌ | 130555/371472 [10:22:44<18:28:20, 3.62it/s] 35%|███▌ | 130556/371472 [10:22:44<19:20:23, 3.46it/s] 35%|███▌ | 130557/371472 [10:22:44<18:48:27, 3.56it/s] 35%|███▌ | 130558/371472 [10:22:45<18:58:47, 3.53it/s] 35%|███▌ | 130559/371472 [10:22:45<18:07:29, 3.69it/s] 35%|███▌ | 130560/371472 [10:22:45<18:18:41, 3.65it/s] {'loss': 3.2764, 'learning_rate': 6.839944838285677e-07, 'epoch': 5.62} + 35%|███▌ | 130560/371472 [10:22:45<18:18:41, 3.65it/s] 35%|███▌ | 130561/371472 [10:22:46<19:09:43, 3.49it/s] 35%|███▌ | 130562/371472 [10:22:46<18:59:02, 3.53it/s] 35%|███▌ | 130563/371472 [10:22:46<18:14:55, 3.67it/s] 35%|███▌ | 130564/371472 [10:22:46<17:55:14, 3.73it/s] 35%|███▌ | 130565/371472 [10:22:47<17:45:10, 3.77it/s] 35%|███▌ | 130566/371472 [10:22:47<17:44:16, 3.77it/s] 35%|███▌ | 130567/371472 [10:22:47<17:41:41, 3.78it/s] 35%|███▌ | 130568/371472 [10:22:47<17:09:57, 3.90it/s] 35%|███▌ | 130569/371472 [10:22:48<18:15:08, 3.67it/s] 35%|███▌ | 130570/371472 [10:22:48<17:36:39, 3.80it/s] 35%|███▌ | 130571/371472 [10:22:48<17:39:30, 3.79it/s] 35%|███▌ | 130572/371472 [10:22:49<19:13:42, 3.48it/s] 35%|███▌ | 130573/371472 [10:22:49<20:28:18, 3.27it/s] 35%|███▌ | 130574/371472 [10:22:49<19:53:14, 3.36it/s] 35%|███▌ | 130575/371472 [10:22:49<18:31:27, 3.61it/s] 35%|███▌ | 130576/371472 [10:22:50<18:20:49, 3.65it/s] 35%|███▌ | 130577/371472 [10:22:50<19:26:40, 3.44it/s] 35%|███▌ | 130578/371472 [10:22:50<19:06:58, 3.50it/s] 35%|███▌ | 130579/371472 [10:22:50<18:10:54, 3.68it/s] 35%|███▌ | 130580/371472 [10:22:51<17:52:01, 3.75it/s] {'loss': 3.1988, 'learning_rate': 6.839460018530888e-07, 'epoch': 5.62} + 35%|███▌ | 130580/371472 [10:22:51<17:52:01, 3.75it/s] 35%|███▌ | 130581/371472 [10:22:51<17:59:10, 3.72it/s] 35%|███▌ | 130582/371472 [10:22:51<17:59:37, 3.72it/s] 35%|███▌ | 130583/371472 [10:22:52<17:30:37, 3.82it/s] 35%|███▌ | 130584/371472 [10:22:52<18:50:18, 3.55it/s] 35%|███▌ | 130585/371472 [10:22:52<18:23:34, 3.64it/s] 35%|███▌ | 130586/371472 [10:22:52<18:37:07, 3.59it/s] 35%|███▌ | 130587/371472 [10:22:53<18:41:50, 3.58it/s] 35%|███▌ | 130588/371472 [10:22:53<18:56:50, 3.53it/s] 35%|███▌ | 130589/371472 [10:22:53<19:19:38, 3.46it/s] 35%|███▌ | 130590/371472 [10:22:54<18:43:27, 3.57it/s] 35%|███▌ | 130591/371472 [10:22:54<18:45:39, 3.57it/s] 35%|███▌ | 130592/371472 [10:22:54<18:57:38, 3.53it/s] 35%|███▌ | 130593/371472 [10:22:54<18:20:24, 3.65it/s] 35%|███▌ | 130594/371472 [10:22:55<21:16:42, 3.14it/s] 35%|███▌ | 130595/371472 [10:22:55<20:15:10, 3.30it/s] 35%|███▌ | 130596/371472 [10:22:55<20:23:34, 3.28it/s] 35%|███▌ | 130597/371472 [10:22:56<21:39:04, 3.09it/s] 35%|███▌ | 130598/371472 [10:22:56<20:41:52, 3.23it/s] 35%|███▌ | 130599/371472 [10:22:56<20:14:14, 3.31it/s] 35%|███▌ | 130600/371472 [10:22:57<19:06:37, 3.50it/s] {'loss': 2.9662, 'learning_rate': 6.838975198776099e-07, 'epoch': 5.63} + 35%|███▌ | 130600/371472 [10:22:57<19:06:37, 3.50it/s] 35%|███▌ | 130601/371472 [10:22:57<18:35:22, 3.60it/s] 35%|███▌ | 130602/371472 [10:22:57<18:24:12, 3.64it/s] 35%|███▌ | 130603/371472 [10:22:57<18:18:29, 3.65it/s] 35%|███▌ | 130604/371472 [10:22:58<18:28:20, 3.62it/s] 35%|███▌ | 130605/371472 [10:22:58<18:54:11, 3.54it/s] 35%|███▌ | 130606/371472 [10:22:58<19:24:18, 3.45it/s] 35%|███▌ | 130607/371472 [10:22:58<18:50:31, 3.55it/s] 35%|███▌ | 130608/371472 [10:22:59<18:34:46, 3.60it/s] 35%|███▌ | 130609/371472 [10:22:59<18:11:04, 3.68it/s] 35%|███▌ | 130610/371472 [10:22:59<19:23:25, 3.45it/s] 35%|███▌ | 130611/371472 [10:23:00<18:51:44, 3.55it/s] 35%|███▌ | 130612/371472 [10:23:00<18:39:16, 3.59it/s] 35%|███▌ | 130613/371472 [10:23:00<18:10:12, 3.68it/s] 35%|███▌ | 130614/371472 [10:23:00<17:45:11, 3.77it/s] 35%|███▌ | 130615/371472 [10:23:01<17:50:43, 3.75it/s] 35%|███▌ | 130616/371472 [10:23:01<17:40:44, 3.78it/s] 35%|███▌ | 130617/371472 [10:23:01<18:30:20, 3.62it/s] 35%|███▌ | 130618/371472 [10:23:01<18:46:12, 3.56it/s] 35%|███▌ | 130619/371472 [10:23:02<18:17:52, 3.66it/s] 35%|███▌ | 130620/371472 [10:23:02<18:47:38, 3.56it/s] {'loss': 3.1406, 'learning_rate': 6.83849037902131e-07, 'epoch': 5.63} + 35%|███▌ | 130620/371472 [10:23:02<18:47:38, 3.56it/s] 35%|███▌ | 130621/371472 [10:23:02<18:17:00, 3.66it/s] 35%|███▌ | 130622/371472 [10:23:03<18:22:54, 3.64it/s] 35%|███▌ | 130623/371472 [10:23:03<19:34:03, 3.42it/s] 35%|███▌ | 130624/371472 [10:23:03<19:28:32, 3.44it/s] 35%|███▌ | 130625/371472 [10:23:04<19:54:31, 3.36it/s] 35%|███▌ | 130626/371472 [10:23:04<19:18:35, 3.46it/s] 35%|███▌ | 130627/371472 [10:23:04<19:34:52, 3.42it/s] 35%|███▌ | 130628/371472 [10:23:04<19:39:10, 3.40it/s] 35%|███▌ | 130629/371472 [10:23:05<21:04:21, 3.17it/s] 35%|███▌ | 130630/371472 [10:23:05<19:58:34, 3.35it/s] 35%|███▌ | 130631/371472 [10:23:05<19:13:18, 3.48it/s] 35%|███▌ | 130632/371472 [10:23:06<18:52:05, 3.55it/s] 35%|███▌ | 130633/371472 [10:23:06<19:36:34, 3.41it/s] 35%|███▌ | 130634/371472 [10:23:06<19:13:49, 3.48it/s] 35%|███▌ | 130635/371472 [10:23:06<18:59:01, 3.52it/s] 35%|███▌ | 130636/371472 [10:23:07<18:32:13, 3.61it/s] 35%|███▌ | 130637/371472 [10:23:07<18:43:38, 3.57it/s] 35%|███▌ | 130638/371472 [10:23:07<18:20:04, 3.65it/s] 35%|███▌ | 130639/371472 [10:23:07<18:24:22, 3.63it/s] 35%|███▌ | 130640/371472 [10:23:08<18:35:55, 3.60it/s] {'loss': 3.1734, 'learning_rate': 6.838005559266522e-07, 'epoch': 5.63} + 35%|███▌ | 130640/371472 [10:23:08<18:35:55, 3.60it/s] 35%|███▌ | 130641/371472 [10:23:08<18:18:02, 3.66it/s] 35%|███▌ | 130642/371472 [10:23:08<18:54:21, 3.54it/s] 35%|███▌ | 130643/371472 [10:23:09<18:38:39, 3.59it/s] 35%|███▌ | 130644/371472 [10:23:09<18:31:52, 3.61it/s] 35%|███▌ | 130645/371472 [10:23:09<17:43:58, 3.77it/s] 35%|███▌ | 130646/371472 [10:23:09<17:21:55, 3.85it/s] 35%|███▌ | 130647/371472 [10:23:10<17:41:03, 3.78it/s] 35%|███▌ | 130648/371472 [10:23:10<18:37:09, 3.59it/s] 35%|███▌ | 130649/371472 [10:23:10<19:43:16, 3.39it/s] 35%|███▌ | 130650/371472 [10:23:11<19:04:11, 3.51it/s] 35%|███▌ | 130651/371472 [10:23:11<19:53:05, 3.36it/s] 35%|███▌ | 130652/371472 [10:23:11<18:49:11, 3.55it/s] 35%|███▌ | 130653/371472 [10:23:11<18:52:19, 3.54it/s] 35%|███▌ | 130654/371472 [10:23:12<18:17:37, 3.66it/s] 35%|███▌ | 130655/371472 [10:23:12<18:10:22, 3.68it/s] 35%|███▌ | 130656/371472 [10:23:12<18:57:03, 3.53it/s] 35%|███▌ | 130657/371472 [10:23:12<18:14:52, 3.67it/s] 35%|███▌ | 130658/371472 [10:23:13<18:36:33, 3.59it/s] 35%|███▌ | 130659/371472 [10:23:13<18:09:34, 3.68it/s] 35%|███▌ | 130660/371472 [10:23:13<18:17:42, 3.66it/s] {'loss': 3.1844, 'learning_rate': 6.837520739511733e-07, 'epoch': 5.63} + 35%|███▌ | 130660/371472 [10:23:13<18:17:42, 3.66it/s] 35%|███▌ | 130661/371472 [10:23:14<18:29:27, 3.62it/s] 35%|███▌ | 130662/371472 [10:23:14<18:36:39, 3.59it/s] 35%|███▌ | 130663/371472 [10:23:14<19:11:52, 3.48it/s] 35%|███▌ | 130664/371472 [10:23:15<21:05:00, 3.17it/s] 35%|███▌ | 130665/371472 [10:23:15<19:53:01, 3.36it/s] 35%|███▌ | 130666/371472 [10:23:15<19:04:16, 3.51it/s] 35%|███▌ | 130667/371472 [10:23:15<19:28:41, 3.43it/s] 35%|███▌ | 130668/371472 [10:23:16<18:59:28, 3.52it/s] 35%|███▌ | 130669/371472 [10:23:16<18:13:11, 3.67it/s] 35%|███▌ | 130670/371472 [10:23:16<18:56:27, 3.53it/s] 35%|███▌ | 130671/371472 [10:23:16<18:15:40, 3.66it/s] 35%|███▌ | 130672/371472 [10:23:17<18:28:02, 3.62it/s] 35%|███▌ | 130673/371472 [10:23:17<18:26:42, 3.63it/s] 35%|███▌ | 130674/371472 [10:23:17<18:34:10, 3.60it/s] 35%|███▌ | 130675/371472 [10:23:18<19:23:53, 3.45it/s] 35%|███▌ | 130676/371472 [10:23:18<18:50:37, 3.55it/s] 35%|███▌ | 130677/371472 [10:23:18<18:04:40, 3.70it/s] 35%|███▌ | 130678/371472 [10:23:18<19:35:50, 3.41it/s] 35%|███▌ | 130679/371472 [10:23:19<18:20:34, 3.65it/s] 35%|███▌ | 130680/371472 [10:23:19<17:31:20, 3.82it/s] {'loss': 3.116, 'learning_rate': 6.837035919756943e-07, 'epoch': 5.63} + 35%|███▌ | 130680/371472 [10:23:19<17:31:20, 3.82it/s] 35%|███▌ | 130681/371472 [10:23:19<17:43:56, 3.77it/s] 35%|███▌ | 130682/371472 [10:23:19<17:24:54, 3.84it/s] 35%|███▌ | 130683/371472 [10:23:20<18:10:45, 3.68it/s] 35%|███▌ | 130684/371472 [10:23:20<17:42:05, 3.78it/s] 35%|███▌ | 130685/371472 [10:23:20<17:42:00, 3.78it/s] 35%|███▌ | 130686/371472 [10:23:21<18:04:46, 3.70it/s] 35%|███▌ | 130687/371472 [10:23:21<17:55:18, 3.73it/s] 35%|███▌ | 130688/371472 [10:23:21<18:31:47, 3.61it/s] 35%|███▌ | 130689/371472 [10:23:21<17:49:36, 3.75it/s] 35%|███▌ | 130690/371472 [10:23:22<18:19:53, 3.65it/s] 35%|███▌ | 130691/371472 [10:23:22<18:10:43, 3.68it/s] 35%|███▌ | 130692/371472 [10:23:22<18:28:24, 3.62it/s] 35%|███▌ | 130693/371472 [10:23:22<18:04:10, 3.70it/s] 35%|███▌ | 130694/371472 [10:23:23<17:41:41, 3.78it/s] 35%|███▌ | 130695/371472 [10:23:23<18:06:05, 3.69it/s] 35%|███▌ | 130696/371472 [10:23:23<18:29:19, 3.62it/s] 35%|███▌ | 130697/371472 [10:23:24<18:27:39, 3.62it/s] 35%|███▌ | 130698/371472 [10:23:24<17:52:14, 3.74it/s] 35%|███▌ | 130699/371472 [10:23:24<17:56:29, 3.73it/s] 35%|███▌ | 130700/371472 [10:23:24<18:24:47, 3.63it/s] {'loss': 3.1303, 'learning_rate': 6.836551100002154e-07, 'epoch': 5.63} + 35%|███▌ | 130700/371472 [10:23:24<18:24:47, 3.63it/s] 35%|███▌ | 130701/371472 [10:23:25<17:41:10, 3.78it/s] 35%|███▌ | 130702/371472 [10:23:25<17:31:38, 3.82it/s] 35%|███▌ | 130703/371472 [10:23:25<19:25:12, 3.44it/s] 35%|███▌ | 130704/371472 [10:23:25<18:30:50, 3.61it/s] 35%|███▌ | 130705/371472 [10:23:26<18:15:51, 3.66it/s] 35%|███▌ | 130706/371472 [10:23:26<19:52:23, 3.37it/s] 35%|███▌ | 130707/371472 [10:23:26<20:01:37, 3.34it/s] 35%|███▌ | 130708/371472 [10:23:27<20:03:02, 3.34it/s] 35%|███▌ | 130709/371472 [10:23:27<20:38:27, 3.24it/s] 35%|███▌ | 130710/371472 [10:23:27<19:58:28, 3.35it/s] 35%|███▌ | 130711/371472 [10:23:28<19:40:56, 3.40it/s] 35%|███▌ | 130712/371472 [10:23:28<18:47:15, 3.56it/s] 35%|███▌ | 130713/371472 [10:23:28<19:11:19, 3.49it/s] 35%|███▌ | 130714/371472 [10:23:28<19:03:14, 3.51it/s] 35%|███▌ | 130715/371472 [10:23:29<19:06:16, 3.50it/s] 35%|███▌ | 130716/371472 [10:23:29<19:58:50, 3.35it/s] 35%|███▌ | 130717/371472 [10:23:29<20:10:09, 3.32it/s] 35%|███▌ | 130718/371472 [10:23:30<21:46:19, 3.07it/s] 35%|███▌ | 130719/371472 [10:23:30<21:10:12, 3.16it/s] 35%|███▌ | 130720/371472 [10:23:30<19:44:33, 3.39it/s] {'loss': 3.1137, 'learning_rate': 6.836066280247366e-07, 'epoch': 5.63} + 35%|███▌ | 130720/371472 [10:23:30<19:44:33, 3.39it/s] 35%|███▌ | 130721/371472 [10:23:31<19:48:24, 3.38it/s] 35%|███▌ | 130722/371472 [10:23:31<19:26:01, 3.44it/s] 35%|███▌ | 130723/371472 [10:23:31<18:22:49, 3.64it/s] 35%|███▌ | 130724/371472 [10:23:31<19:35:25, 3.41it/s] 35%|███▌ | 130725/371472 [10:23:32<19:33:04, 3.42it/s] 35%|███▌ | 130726/371472 [10:23:32<19:24:34, 3.45it/s] 35%|███▌ | 130727/371472 [10:23:32<19:10:11, 3.49it/s] 35%|███▌ | 130728/371472 [10:23:33<18:49:27, 3.55it/s] 35%|███▌ | 130729/371472 [10:23:33<18:36:38, 3.59it/s] 35%|███▌ | 130730/371472 [10:23:33<18:16:34, 3.66it/s] 35%|███▌ | 130731/371472 [10:23:33<18:33:21, 3.60it/s] 35%|███▌ | 130732/371472 [10:23:34<17:39:30, 3.79it/s] 35%|███▌ | 130733/371472 [10:23:34<16:58:52, 3.94it/s] 35%|███▌ | 130734/371472 [10:23:34<16:29:50, 4.05it/s] 35%|███▌ | 130735/371472 [10:23:34<17:01:01, 3.93it/s] 35%|███▌ | 130736/371472 [10:23:35<18:09:32, 3.68it/s] 35%|███▌ | 130737/371472 [10:23:35<17:49:34, 3.75it/s] 35%|███▌ | 130738/371472 [10:23:35<17:37:45, 3.79it/s] 35%|███▌ | 130739/371472 [10:23:35<17:58:23, 3.72it/s] 35%|███▌ | 130740/371472 [10:23:36<18:14:26, 3.67it/s] {'loss': 3.2246, 'learning_rate': 6.835581460492577e-07, 'epoch': 5.63} + 35%|███▌ | 130740/371472 [10:23:36<18:14:26, 3.67it/s] 35%|███▌ | 130741/371472 [10:23:36<17:36:47, 3.80it/s] 35%|███▌ | 130742/371472 [10:23:36<17:36:45, 3.80it/s] 35%|███▌ | 130743/371472 [10:23:36<17:49:46, 3.75it/s] 35%|███▌ | 130744/371472 [10:23:37<17:37:23, 3.79it/s] 35%|███▌ | 130745/371472 [10:23:37<16:52:09, 3.96it/s] 35%|███▌ | 130746/371472 [10:23:37<16:42:04, 4.00it/s] 35%|███▌ | 130747/371472 [10:23:37<16:49:10, 3.98it/s] 35%|███▌ | 130748/371472 [10:23:38<16:51:21, 3.97it/s] 35%|███▌ | 130749/371472 [10:23:38<17:06:29, 3.91it/s] 35%|███▌ | 130750/371472 [10:23:38<18:23:14, 3.64it/s] 35%|███▌ | 130751/371472 [10:23:39<17:52:57, 3.74it/s] 35%|███▌ | 130752/371472 [10:23:39<18:09:36, 3.68it/s] 35%|███▌ | 130753/371472 [10:23:39<18:56:16, 3.53it/s] 35%|███▌ | 130754/371472 [10:23:39<19:22:04, 3.45it/s] 35%|███▌ | 130755/371472 [10:23:40<19:11:23, 3.48it/s] 35%|███▌ | 130756/371472 [10:23:40<19:09:59, 3.49it/s] 35%|███▌ | 130757/371472 [10:23:40<20:23:44, 3.28it/s] 35%|███▌ | 130758/371472 [10:23:41<20:44:13, 3.22it/s] 35%|███▌ | 130759/371472 [10:23:41<19:51:30, 3.37it/s] 35%|███▌ | 130760/371472 [10:23:41<19:29:57, 3.43it/s] {'loss': 3.2303, 'learning_rate': 6.835096640737789e-07, 'epoch': 5.63} + 35%|███▌ | 130760/371472 [10:23:41<19:29:57, 3.43it/s] 35%|███▌ | 130761/371472 [10:23:41<18:20:45, 3.64it/s] 35%|███▌ | 130762/371472 [10:23:42<19:36:05, 3.41it/s] 35%|███▌ | 130763/371472 [10:23:42<19:17:43, 3.47it/s] 35%|███▌ | 130764/371472 [10:23:42<19:09:24, 3.49it/s] 35%|███▌ | 130765/371472 [10:23:43<19:20:54, 3.46it/s] 35%|███▌ | 130766/371472 [10:23:43<19:11:38, 3.48it/s] 35%|███▌ | 130767/371472 [10:23:43<18:21:29, 3.64it/s] 35%|███▌ | 130768/371472 [10:23:43<18:24:05, 3.63it/s] 35%|███▌ | 130769/371472 [10:23:44<18:10:07, 3.68it/s] 35%|███▌ | 130770/371472 [10:23:44<18:11:54, 3.67it/s] 35%|███▌ | 130771/371472 [10:23:44<18:38:29, 3.59it/s] 35%|███▌ | 130772/371472 [10:23:45<19:26:42, 3.44it/s] 35%|███▌ | 130773/371472 [10:23:45<18:23:51, 3.63it/s] 35%|███▌ | 130774/371472 [10:23:45<19:00:28, 3.52it/s] 35%|███▌ | 130775/371472 [10:23:45<18:44:38, 3.57it/s] 35%|███▌ | 130776/371472 [10:23:46<18:26:57, 3.62it/s] 35%|███▌ | 130777/371472 [10:23:46<18:35:21, 3.60it/s] 35%|███▌ | 130778/371472 [10:23:46<20:01:17, 3.34it/s] 35%|███▌ | 130779/371472 [10:23:47<19:57:38, 3.35it/s] 35%|███▌ | 130780/371472 [10:23:47<19:03:06, 3.51it/s] {'loss': 3.1437, 'learning_rate': 6.834611820982999e-07, 'epoch': 5.63} + 35%|███▌ | 130780/371472 [10:23:47<19:03:06, 3.51it/s] 35%|███▌ | 130781/371472 [10:23:47<18:45:58, 3.56it/s] 35%|███▌ | 130782/371472 [10:23:47<17:54:19, 3.73it/s] 35%|███▌ | 130783/371472 [10:23:48<19:01:57, 3.51it/s] 35%|███▌ | 130784/371472 [10:23:48<18:24:19, 3.63it/s] 35%|███▌ | 130785/371472 [10:23:48<19:40:50, 3.40it/s] 35%|███▌ | 130786/371472 [10:23:49<19:39:58, 3.40it/s] 35%|███▌ | 130787/371472 [10:23:49<19:47:23, 3.38it/s] 35%|███▌ | 130788/371472 [10:23:49<19:23:42, 3.45it/s] 35%|███▌ | 130789/371472 [10:23:50<20:36:17, 3.24it/s] 35%|███▌ | 130790/371472 [10:23:50<21:54:49, 3.05it/s] 35%|███▌ | 130791/371472 [10:23:50<21:48:15, 3.07it/s] 35%|███▌ | 130792/371472 [10:23:50<20:15:32, 3.30it/s] 35%|███▌ | 130793/371472 [10:23:51<19:19:17, 3.46it/s] 35%|███▌ | 130794/371472 [10:23:51<18:11:05, 3.68it/s] 35%|███▌ | 130795/371472 [10:23:51<20:02:59, 3.33it/s] 35%|███▌ | 130796/371472 [10:23:52<18:43:43, 3.57it/s] 35%|███▌ | 130797/371472 [10:23:52<19:14:16, 3.48it/s] 35%|███▌ | 130798/371472 [10:23:52<18:19:12, 3.65it/s] 35%|███▌ | 130799/371472 [10:23:52<18:50:48, 3.55it/s] 35%|███▌ | 130800/371472 [10:23:53<19:08:37, 3.49it/s] {'loss': 3.2279, 'learning_rate': 6.834127001228209e-07, 'epoch': 5.63} + 35%|███▌ | 130800/371472 [10:23:53<19:08:37, 3.49it/s] 35%|███▌ | 130801/371472 [10:23:53<18:15:15, 3.66it/s] 35%|███▌ | 130802/371472 [10:23:53<18:39:12, 3.58it/s] 35%|███▌ | 130803/371472 [10:23:53<18:30:20, 3.61it/s] 35%|███▌ | 130804/371472 [10:23:54<17:58:13, 3.72it/s] 35%|███▌ | 130805/371472 [10:23:54<18:29:35, 3.61it/s] 35%|███▌ | 130806/371472 [10:23:54<19:19:37, 3.46it/s] 35%|███▌ | 130807/371472 [10:23:55<18:48:07, 3.56it/s] 35%|███▌ | 130808/371472 [10:23:55<20:18:42, 3.29it/s] 35%|███▌ | 130809/371472 [10:23:55<19:18:55, 3.46it/s] 35%|███▌ | 130810/371472 [10:23:55<18:43:05, 3.57it/s] 35%|███▌ | 130811/371472 [10:23:56<18:50:14, 3.55it/s] 35%|███▌ | 130812/371472 [10:23:56<18:20:07, 3.65it/s] 35%|███▌ | 130813/371472 [10:23:56<17:39:54, 3.78it/s] 35%|███▌ | 130814/371472 [10:23:57<17:20:24, 3.86it/s] 35%|███▌ | 130815/371472 [10:23:57<17:48:41, 3.75it/s] 35%|███▌ | 130816/371472 [10:23:57<19:07:13, 3.50it/s] 35%|███▌ | 130817/371472 [10:23:57<18:21:46, 3.64it/s] 35%|███▌ | 130818/371472 [10:23:58<18:03:47, 3.70it/s] 35%|███▌ | 130819/371472 [10:23:58<18:44:36, 3.57it/s] 35%|███▌ | 130820/371472 [10:23:58<18:11:55, 3.67it/s] {'loss': 3.2933, 'learning_rate': 6.833642181473421e-07, 'epoch': 5.63} + 35%|███▌ | 130820/371472 [10:23:58<18:11:55, 3.67it/s] 35%|███▌ | 130821/371472 [10:23:58<17:44:55, 3.77it/s] 35%|███▌ | 130822/371472 [10:23:59<17:00:15, 3.93it/s] 35%|███▌ | 130823/371472 [10:23:59<18:52:18, 3.54it/s] 35%|███▌ | 130824/371472 [10:23:59<22:14:34, 3.01it/s] 35%|███▌ | 130825/371472 [10:24:00<20:42:16, 3.23it/s] 35%|███▌ | 130826/371472 [10:24:00<19:39:55, 3.40it/s] 35%|███▌ | 130827/371472 [10:24:00<19:00:02, 3.52it/s] 35%|███▌ | 130828/371472 [10:24:00<18:12:30, 3.67it/s] 35%|███▌ | 130829/371472 [10:24:01<19:01:54, 3.51it/s] 35%|███▌ | 130830/371472 [10:24:01<18:48:44, 3.55it/s] 35%|███▌ | 130831/371472 [10:24:01<18:15:50, 3.66it/s] 35%|███▌ | 130832/371472 [10:24:02<19:01:13, 3.51it/s] 35%|███▌ | 130833/371472 [10:24:02<19:48:40, 3.37it/s] 35%|███▌ | 130834/371472 [10:24:02<18:48:07, 3.56it/s] 35%|███▌ | 130835/371472 [10:24:03<19:04:16, 3.50it/s] 35%|███▌ | 130836/371472 [10:24:03<18:31:37, 3.61it/s] 35%|███▌ | 130837/371472 [10:24:03<17:53:45, 3.74it/s] 35%|███▌ | 130838/371472 [10:24:03<17:59:19, 3.72it/s] 35%|███▌ | 130839/371472 [10:24:04<17:33:10, 3.81it/s] 35%|███▌ | 130840/371472 [10:24:04<17:15:33, 3.87it/s] {'loss': 3.0662, 'learning_rate': 6.833157361718631e-07, 'epoch': 5.64} + 35%|███▌ | 130840/371472 [10:24:04<17:15:33, 3.87it/s] 35%|███▌ | 130841/371472 [10:24:04<17:58:12, 3.72it/s] 35%|███▌ | 130842/371472 [10:24:04<17:52:19, 3.74it/s] 35%|███▌ | 130843/371472 [10:24:05<18:24:47, 3.63it/s] 35%|███▌ | 130844/371472 [10:24:05<17:52:03, 3.74it/s] 35%|███▌ | 130845/371472 [10:24:05<17:54:38, 3.73it/s] 35%|███▌ | 130846/371472 [10:24:06<19:43:28, 3.39it/s] 35%|███▌ | 130847/371472 [10:24:06<20:01:32, 3.34it/s] 35%|███▌ | 130848/371472 [10:24:06<19:34:25, 3.41it/s] 35%|███▌ | 130849/371472 [10:24:06<18:43:12, 3.57it/s] 35%|███▌ | 130850/371472 [10:24:07<18:53:19, 3.54it/s] 35%|███▌ | 130851/371472 [10:24:07<18:21:12, 3.64it/s] 35%|███▌ | 130852/371472 [10:24:07<18:34:10, 3.60it/s] 35%|███▌ | 130853/371472 [10:24:07<18:20:45, 3.64it/s] 35%|███▌ | 130854/371472 [10:24:08<17:26:39, 3.83it/s] 35%|███▌ | 130855/371472 [10:24:08<18:00:12, 3.71it/s] 35%|███▌ | 130856/371472 [10:24:08<18:41:35, 3.58it/s] 35%|███▌ | 130857/371472 [10:24:09<18:33:13, 3.60it/s] 35%|███▌ | 130858/371472 [10:24:09<20:17:03, 3.30it/s] 35%|███▌ | 130859/371472 [10:24:09<19:11:15, 3.48it/s] 35%|███▌ | 130860/371472 [10:24:09<19:03:07, 3.51it/s] {'loss': 3.1152, 'learning_rate': 6.832672541963843e-07, 'epoch': 5.64} + 35%|███▌ | 130860/371472 [10:24:09<19:03:07, 3.51it/s] 35%|███▌ | 130861/371472 [10:24:10<18:13:03, 3.67it/s] 35%|███▌ | 130862/371472 [10:24:10<17:50:45, 3.75it/s] 35%|███▌ | 130863/371472 [10:24:10<18:35:52, 3.59it/s] 35%|███▌ | 130864/371472 [10:24:10<18:15:03, 3.66it/s] 35%|███▌ | 130865/371472 [10:24:11<17:55:26, 3.73it/s] 35%|███▌ | 130866/371472 [10:24:11<18:35:10, 3.60it/s] 35%|███▌ | 130867/371472 [10:24:11<18:25:34, 3.63it/s] 35%|███▌ | 130868/371472 [10:24:12<18:41:50, 3.57it/s] 35%|███▌ | 130869/371472 [10:24:12<18:21:19, 3.64it/s] 35%|███▌ | 130870/371472 [10:24:12<18:25:55, 3.63it/s] 35%|███▌ | 130871/371472 [10:24:12<17:57:53, 3.72it/s] 35%|███▌ | 130872/371472 [10:24:13<18:23:19, 3.63it/s] 35%|███▌ | 130873/371472 [10:24:13<18:04:13, 3.70it/s] 35%|███▌ | 130874/371472 [10:24:13<18:12:40, 3.67it/s] 35%|███▌ | 130875/371472 [10:24:14<19:16:44, 3.47it/s] 35%|███▌ | 130876/371472 [10:24:14<19:49:40, 3.37it/s] 35%|███▌ | 130877/371472 [10:24:14<19:05:59, 3.50it/s] 35%|███▌ | 130878/371472 [10:24:14<18:25:27, 3.63it/s] 35%|███▌ | 130879/371472 [10:24:15<19:21:26, 3.45it/s] 35%|███▌ | 130880/371472 [10:24:15<20:24:17, 3.28it/s] {'loss': 3.1508, 'learning_rate': 6.832187722209054e-07, 'epoch': 5.64} + 35%|███▌ | 130880/371472 [10:24:15<20:24:17, 3.28it/s] 35%|███▌ | 130881/371472 [10:24:15<20:11:09, 3.31it/s] 35%|███▌ | 130882/371472 [10:24:16<19:18:27, 3.46it/s] 35%|███▌ | 130883/371472 [10:24:16<18:38:40, 3.58it/s] 35%|███▌ | 130884/371472 [10:24:16<21:41:05, 3.08it/s] 35%|███▌ | 130885/371472 [10:24:17<19:53:33, 3.36it/s] 35%|███▌ | 130886/371472 [10:24:17<19:11:02, 3.48it/s] 35%|███▌ | 130887/371472 [10:24:17<20:38:51, 3.24it/s] 35%|███▌ | 130888/371472 [10:24:17<20:12:09, 3.31it/s] 35%|███▌ | 130889/371472 [10:24:18<19:29:14, 3.43it/s] 35%|███▌ | 130890/371472 [10:24:18<23:22:03, 2.86it/s] 35%|███▌ | 130891/371472 [10:24:18<21:25:13, 3.12it/s] 35%|███▌ | 130892/371472 [10:24:19<20:39:43, 3.23it/s] 35%|███▌ | 130893/371472 [10:24:19<20:48:42, 3.21it/s] 35%|███▌ | 130894/371472 [10:24:19<19:42:16, 3.39it/s] 35%|███▌ | 130895/371472 [10:24:20<18:51:42, 3.54it/s] 35%|███▌ | 130896/371472 [10:24:20<19:08:51, 3.49it/s] 35%|███▌ | 130897/371472 [10:24:20<19:40:34, 3.40it/s] 35%|███▌ | 130898/371472 [10:24:20<19:18:46, 3.46it/s] 35%|███▌ | 130899/371472 [10:24:21<19:25:23, 3.44it/s] 35%|███▌ | 130900/371472 [10:24:21<18:40:00, 3.58it/s] {'loss': 3.2658, 'learning_rate': 6.831702902454265e-07, 'epoch': 5.64} + 35%|███▌ | 130900/371472 [10:24:21<18:40:00, 3.58it/s] 35%|███▌ | 130901/371472 [10:24:21<17:58:54, 3.72it/s] 35%|███▌ | 130902/371472 [10:24:21<17:18:23, 3.86it/s] 35%|███▌ | 130903/371472 [10:24:22<17:22:13, 3.85it/s] 35%|███▌ | 130904/371472 [10:24:22<18:21:35, 3.64it/s] 35%|███▌ | 130905/371472 [10:24:22<18:15:33, 3.66it/s] 35%|███▌ | 130906/371472 [10:24:23<19:27:17, 3.43it/s] 35%|███▌ | 130907/371472 [10:24:23<20:05:46, 3.33it/s] 35%|███▌ | 130908/371472 [10:24:23<19:00:22, 3.52it/s] 35%|███▌ | 130909/371472 [10:24:23<18:35:27, 3.59it/s] 35%|███▌ | 130910/371472 [10:24:24<18:37:31, 3.59it/s] 35%|███▌ | 130911/371472 [10:24:24<20:52:12, 3.20it/s] 35%|███▌ | 130912/371472 [10:24:24<20:23:11, 3.28it/s] 35%|███▌ | 130913/371472 [10:24:25<20:49:27, 3.21it/s] 35%|███▌ | 130914/371472 [10:24:25<19:39:06, 3.40it/s] 35%|███▌ | 130915/371472 [10:24:25<18:55:09, 3.53it/s] 35%|███▌ | 130916/371472 [10:24:26<20:20:52, 3.28it/s] 35%|███▌ | 130917/371472 [10:24:26<18:56:51, 3.53it/s] 35%|███▌ | 130918/371472 [10:24:26<20:33:55, 3.25it/s] 35%|███▌ | 130919/371472 [10:24:26<19:42:07, 3.39it/s] 35%|███▌ | 130920/371472 [10:24:27<19:03:51, 3.50it/s] {'loss': 3.1537, 'learning_rate': 6.831218082699476e-07, 'epoch': 5.64} + 35%|███▌ | 130920/371472 [10:24:27<19:03:51, 3.50it/s] 35%|███▌ | 130921/371472 [10:24:27<19:10:53, 3.48it/s] 35%|███▌ | 130922/371472 [10:24:27<18:26:33, 3.62it/s] 35%|███▌ | 130923/371472 [10:24:28<21:02:41, 3.18it/s] 35%|███▌ | 130924/371472 [10:24:28<21:57:02, 3.04it/s] 35%|███▌ | 130925/371472 [10:24:28<21:12:44, 3.15it/s] 35%|███▌ | 130926/371472 [10:24:29<19:30:18, 3.43it/s] 35%|███▌ | 130927/371472 [10:24:29<18:51:53, 3.54it/s] 35%|███▌ | 130928/371472 [10:24:29<18:46:22, 3.56it/s] 35%|███▌ | 130929/371472 [10:24:29<17:59:48, 3.71it/s] 35%|███▌ | 130930/371472 [10:24:30<18:02:27, 3.70it/s] 35%|███▌ | 130931/371472 [10:24:30<18:36:44, 3.59it/s] 35%|███▌ | 130932/371472 [10:24:30<18:23:32, 3.63it/s] 35%|███▌ | 130933/371472 [10:24:30<17:37:22, 3.79it/s] 35%|███▌ | 130934/371472 [10:24:31<17:59:08, 3.71it/s] 35%|███▌ | 130935/371472 [10:24:31<17:49:45, 3.75it/s] 35%|███▌ | 130936/371472 [10:24:31<17:35:56, 3.80it/s] 35%|███▌ | 130937/371472 [10:24:32<18:11:12, 3.67it/s] 35%|███▌ | 130938/371472 [10:24:32<17:53:11, 3.74it/s] 35%|███▌ | 130939/371472 [10:24:32<18:14:34, 3.66it/s] 35%|███▌ | 130940/371472 [10:24:32<19:20:04, 3.46it/s] {'loss': 3.2088, 'learning_rate': 6.830733262944687e-07, 'epoch': 5.64} + 35%|███▌ | 130940/371472 [10:24:32<19:20:04, 3.46it/s] 35%|███▌ | 130941/371472 [10:24:33<19:43:05, 3.39it/s] 35%|███▌ | 130942/371472 [10:24:33<19:39:02, 3.40it/s] 35%|███▌ | 130943/371472 [10:24:33<18:38:28, 3.58it/s] 35%|███▌ | 130944/371472 [10:24:33<17:49:07, 3.75it/s] 35%|███▌ | 130945/371472 [10:24:34<17:52:49, 3.74it/s] 35%|███▌ | 130946/371472 [10:24:34<18:48:16, 3.55it/s] 35%|███▌ | 130947/371472 [10:24:34<18:24:10, 3.63it/s] 35%|███▌ | 130948/371472 [10:24:35<18:09:41, 3.68it/s] 35%|███▌ | 130949/371472 [10:24:35<18:37:49, 3.59it/s] 35%|███▌ | 130950/371472 [10:24:35<18:17:11, 3.65it/s] 35%|███▌ | 130951/371472 [10:24:35<17:41:35, 3.78it/s] 35%|███▌ | 130952/371472 [10:24:36<17:50:15, 3.75it/s] 35%|███▌ | 130953/371472 [10:24:36<18:58:04, 3.52it/s] 35%|███▌ | 130954/371472 [10:24:36<18:12:03, 3.67it/s] 35%|███▌ | 130955/371472 [10:24:36<17:54:36, 3.73it/s] 35%|███▌ | 130956/371472 [10:24:37<17:20:42, 3.85it/s] 35%|███▌ | 130957/371472 [10:24:37<17:03:28, 3.92it/s] 35%|███▌ | 130958/371472 [10:24:37<17:08:55, 3.90it/s] 35%|███▌ | 130959/371472 [10:24:37<16:57:47, 3.94it/s] 35%|███▌ | 130960/371472 [10:24:38<17:41:14, 3.78it/s] {'loss': 3.0688, 'learning_rate': 6.830248443189898e-07, 'epoch': 5.64} + 35%|███▌ | 130960/371472 [10:24:38<17:41:14, 3.78it/s] 35%|███▌ | 130961/371472 [10:24:38<18:09:33, 3.68it/s] 35%|███▌ | 130962/371472 [10:24:38<18:08:50, 3.68it/s] 35%|███▌ | 130963/371472 [10:24:39<18:31:11, 3.61it/s] 35%|███▌ | 130964/371472 [10:24:39<21:21:51, 3.13it/s] 35%|███▌ | 130965/371472 [10:24:39<20:10:16, 3.31it/s] 35%|███▌ | 130966/371472 [10:24:40<19:09:31, 3.49it/s] 35%|███▌ | 130967/371472 [10:24:40<19:14:21, 3.47it/s] 35%|███▌ | 130968/371472 [10:24:40<20:30:32, 3.26it/s] 35%|███▌ | 130969/371472 [10:24:41<22:03:47, 3.03it/s] 35%|███▌ | 130970/371472 [10:24:41<21:25:01, 3.12it/s] 35%|███▌ | 130971/371472 [10:24:41<19:55:44, 3.35it/s] 35%|███▌ | 130972/371472 [10:24:41<19:02:30, 3.51it/s] 35%|███▌ | 130973/371472 [10:24:42<18:51:14, 3.54it/s] 35%|███▌ | 130974/371472 [10:24:42<18:33:34, 3.60it/s] 35%|███▌ | 130975/371472 [10:24:42<17:43:03, 3.77it/s] 35%|███▌ | 130976/371472 [10:24:42<18:37:19, 3.59it/s] 35%|███▌ | 130977/371472 [10:24:43<18:11:52, 3.67it/s] 35%|███▌ | 130978/371472 [10:24:43<17:55:24, 3.73it/s] 35%|███▌ | 130979/371472 [10:24:43<18:21:57, 3.64it/s] 35%|███▌ | 130980/371472 [10:24:44<18:07:42, 3.69it/s] {'loss': 3.2585, 'learning_rate': 6.829763623435109e-07, 'epoch': 5.64} + 35%|███▌ | 130980/371472 [10:24:44<18:07:42, 3.69it/s] 35%|███▌ | 130981/371472 [10:24:44<17:35:05, 3.80it/s] 35%|███▌ | 130982/371472 [10:24:44<17:48:48, 3.75it/s] 35%|███▌ | 130983/371472 [10:24:44<17:48:14, 3.75it/s] 35%|███▌ | 130984/371472 [10:24:45<18:14:09, 3.66it/s] 35%|███▌ | 130985/371472 [10:24:45<17:44:51, 3.76it/s] 35%|███▌ | 130986/371472 [10:24:45<18:04:57, 3.69it/s] 35%|███▌ | 130987/371472 [10:24:45<19:08:36, 3.49it/s] 35%|███▌ | 130988/371472 [10:24:46<19:22:15, 3.45it/s] 35%|███▌ | 130989/371472 [10:24:46<18:25:12, 3.63it/s] 35%|███▌ | 130990/371472 [10:24:46<18:05:32, 3.69it/s] 35%|███▌ | 130991/371472 [10:24:47<18:02:21, 3.70it/s] 35%|███▌ | 130992/371472 [10:24:47<19:01:26, 3.51it/s] 35%|███▌ | 130993/371472 [10:24:47<18:43:08, 3.57it/s] 35%|███▌ | 130994/371472 [10:24:47<19:55:48, 3.35it/s] 35%|███▌ | 130995/371472 [10:24:48<19:38:54, 3.40it/s] 35%|███▌ | 130996/371472 [10:24:48<20:08:40, 3.32it/s] 35%|███▌ | 130997/371472 [10:24:48<20:39:54, 3.23it/s] 35%|███▌ | 130998/371472 [10:24:49<19:58:28, 3.34it/s] 35%|███▌ | 130999/371472 [10:24:49<19:09:53, 3.49it/s] 35%|███▌ | 131000/371472 [10:24:49<18:39:48, 3.58it/s] {'loss': 3.2439, 'learning_rate': 6.82927880368032e-07, 'epoch': 5.64} + 35%|███▌ | 131000/371472 [10:24:49<18:39:48, 3.58it/s] 35%|███▌ | 131001/371472 [10:24:49<19:05:49, 3.50it/s] 35%|███▌ | 131002/371472 [10:24:50<19:18:07, 3.46it/s] 35%|███▌ | 131003/371472 [10:24:50<18:22:56, 3.63it/s] 35%|███▌ | 131004/371472 [10:24:50<18:50:12, 3.55it/s] 35%|███▌ | 131005/371472 [10:24:51<17:55:08, 3.73it/s] 35%|███▌ | 131006/371472 [10:24:51<20:57:05, 3.19it/s] 35%|███▌ | 131007/371472 [10:24:51<20:03:17, 3.33it/s] 35%|███▌ | 131008/371472 [10:24:52<19:29:58, 3.43it/s] 35%|███▌ | 131009/371472 [10:24:52<19:41:48, 3.39it/s] 35%|███▌ | 131010/371472 [10:24:52<19:20:32, 3.45it/s] 35%|███▌ | 131011/371472 [10:24:52<20:36:49, 3.24it/s] 35%|███▌ | 131012/371472 [10:24:53<20:46:58, 3.21it/s] 35%|███▌ | 131013/371472 [10:24:53<19:41:35, 3.39it/s] 35%|███▌ | 131014/371472 [10:24:53<20:15:13, 3.30it/s] 35%|███▌ | 131015/371472 [10:24:54<19:44:21, 3.38it/s] 35%|███▌ | 131016/371472 [10:24:54<19:45:31, 3.38it/s] 35%|███▌ | 131017/371472 [10:24:54<19:15:40, 3.47it/s] 35%|███▌ | 131018/371472 [10:24:54<18:24:11, 3.63it/s] 35%|███▌ | 131019/371472 [10:24:55<18:51:30, 3.54it/s] 35%|███▌ | 131020/371472 [10:24:55<20:26:19, 3.27it/s] {'loss': 3.124, 'learning_rate': 6.828793983925532e-07, 'epoch': 5.64} + 35%|███▌ | 131020/371472 [10:24:55<20:26:19, 3.27it/s] 35%|███▌ | 131021/371472 [10:24:55<19:12:19, 3.48it/s] 35%|███▌ | 131022/371472 [10:24:56<19:01:54, 3.51it/s] 35%|███▌ | 131023/371472 [10:24:56<20:46:47, 3.21it/s] 35%|███▌ | 131024/371472 [10:24:56<20:29:23, 3.26it/s] 35%|███▌ | 131025/371472 [10:24:57<19:10:49, 3.48it/s] 35%|███▌ | 131026/371472 [10:24:57<19:02:12, 3.51it/s] 35%|███▌ | 131027/371472 [10:24:57<18:38:04, 3.58it/s] 35%|███▌ | 131028/371472 [10:24:57<18:55:12, 3.53it/s] 35%|███▌ | 131029/371472 [10:24:58<18:17:25, 3.65it/s] 35%|███▌ | 131030/371472 [10:24:58<17:33:01, 3.81it/s] 35%|███▌ | 131031/371472 [10:24:58<18:39:25, 3.58it/s] 35%|███▌ | 131032/371472 [10:24:58<18:13:29, 3.66it/s] 35%|███▌ | 131033/371472 [10:24:59<17:43:41, 3.77it/s] 35%|███▌ | 131034/371472 [10:24:59<18:18:21, 3.65it/s] 35%|███▌ | 131035/371472 [10:24:59<17:55:35, 3.73it/s] 35%|███▌ | 131036/371472 [10:25:00<18:16:58, 3.65it/s] 35%|███▌ | 131037/371472 [10:25:00<17:42:10, 3.77it/s] 35%|███▌ | 131038/371472 [10:25:00<17:15:17, 3.87it/s] 35%|███▌ | 131039/371472 [10:25:00<18:13:08, 3.67it/s] 35%|███▌ | 131040/371472 [10:25:01<18:04:16, 3.70it/s] {'loss': 3.0572, 'learning_rate': 6.828309164170743e-07, 'epoch': 5.64} + 35%|███▌ | 131040/371472 [10:25:01<18:04:16, 3.70it/s] 35%|███▌ | 131041/371472 [10:25:01<18:18:16, 3.65it/s] 35%|███▌ | 131042/371472 [10:25:01<18:28:02, 3.62it/s] 35%|███▌ | 131043/371472 [10:25:01<18:44:21, 3.56it/s] 35%|███▌ | 131044/371472 [10:25:02<18:24:26, 3.63it/s] 35%|███▌ | 131045/371472 [10:25:02<18:00:11, 3.71it/s] 35%|███▌ | 131046/371472 [10:25:02<19:22:42, 3.45it/s] 35%|███▌ | 131047/371472 [10:25:03<18:30:51, 3.61it/s] 35%|███▌ | 131048/371472 [10:25:03<18:55:35, 3.53it/s] 35%|███▌ | 131049/371472 [10:25:03<18:03:14, 3.70it/s] 35%|███▌ | 131050/371472 [10:25:03<19:07:14, 3.49it/s] 35%|███▌ | 131051/371472 [10:25:04<18:56:53, 3.52it/s] 35%|███▌ | 131052/371472 [10:25:04<18:39:35, 3.58it/s] 35%|███▌ | 131053/371472 [10:25:04<19:23:14, 3.44it/s] 35%|███▌ | 131054/371472 [10:25:05<18:48:50, 3.55it/s] 35%|███▌ | 131055/371472 [10:25:05<18:37:41, 3.59it/s] 35%|███▌ | 131056/371472 [10:25:05<17:34:15, 3.80it/s] 35%|███▌ | 131057/371472 [10:25:05<17:41:08, 3.78it/s] 35%|███▌ | 131058/371472 [10:25:06<18:50:44, 3.54it/s] 35%|███▌ | 131059/371472 [10:25:06<21:33:47, 3.10it/s] 35%|███▌ | 131060/371472 [10:25:06<20:39:51, 3.23it/s] {'loss': 3.1064, 'learning_rate': 6.827824344415953e-07, 'epoch': 5.65} + 35%|███▌ | 131060/371472 [10:25:06<20:39:51, 3.23it/s] 35%|███▌ | 131061/371472 [10:25:07<19:13:37, 3.47it/s] 35%|███▌ | 131062/371472 [10:25:07<18:25:08, 3.63it/s] 35%|███▌ | 131063/371472 [10:25:07<18:42:04, 3.57it/s] 35%|███▌ | 131064/371472 [10:25:07<18:32:16, 3.60it/s] 35%|███▌ | 131065/371472 [10:25:08<17:56:26, 3.72it/s] 35%|███▌ | 131066/371472 [10:25:08<17:49:12, 3.75it/s] 35%|███▌ | 131067/371472 [10:25:08<17:11:47, 3.88it/s] 35%|███▌ | 131068/371472 [10:25:08<18:23:51, 3.63it/s] 35%|███▌ | 131069/371472 [10:25:09<19:49:15, 3.37it/s] 35%|███▌ | 131070/371472 [10:25:09<26:09:07, 2.55it/s] 35%|███▌ | 131071/371472 [10:25:10<23:38:27, 2.82it/s] 35%|███▌ | 131072/371472 [10:25:10<21:51:20, 3.06it/s] 35%|███▌ | 131073/371472 [10:25:10<20:33:41, 3.25it/s] 35%|███▌ | 131074/371472 [10:25:10<19:14:45, 3.47it/s] 35%|███▌ | 131075/371472 [10:25:11<18:14:37, 3.66it/s] 35%|███▌ | 131076/371472 [10:25:11<18:28:29, 3.61it/s] 35%|███▌ | 131077/371472 [10:25:11<18:10:08, 3.68it/s] 35%|███▌ | 131078/371472 [10:25:11<17:31:38, 3.81it/s] 35%|███▌ | 131079/371472 [10:25:12<17:34:36, 3.80it/s] 35%|███▌ | 131080/371472 [10:25:12<20:07:21, 3.32it/s] {'loss': 3.3465, 'learning_rate': 6.827339524661165e-07, 'epoch': 5.65} + 35%|███▌ | 131080/371472 [10:25:12<20:07:21, 3.32it/s] 35%|███▌ | 131081/371472 [10:25:12<20:48:36, 3.21it/s] 35%|███▌ | 131082/371472 [10:25:13<20:09:41, 3.31it/s] 35%|███▌ | 131083/371472 [10:25:13<20:14:11, 3.30it/s] 35%|███▌ | 131084/371472 [10:25:13<19:30:11, 3.42it/s] 35%|███▌ | 131085/371472 [10:25:14<18:58:53, 3.52it/s] 35%|███▌ | 131086/371472 [10:25:14<18:47:28, 3.55it/s] 35%|███▌ | 131087/371472 [10:25:14<20:44:02, 3.22it/s] 35%|███▌ | 131088/371472 [10:25:14<19:15:31, 3.47it/s] 35%|███▌ | 131089/371472 [10:25:15<21:31:30, 3.10it/s] 35%|███▌ | 131090/371472 [10:25:15<19:40:58, 3.39it/s] 35%|███▌ | 131091/371472 [10:25:15<18:36:51, 3.59it/s] 35%|███▌ | 131092/371472 [10:25:16<19:11:51, 3.48it/s] 35%|███▌ | 131093/371472 [10:25:16<19:16:08, 3.47it/s] 35%|███▌ | 131094/371472 [10:25:16<18:38:00, 3.58it/s] 35%|███▌ | 131095/371472 [10:25:16<18:40:08, 3.58it/s] 35%|███▌ | 131096/371472 [10:25:17<19:49:14, 3.37it/s] 35%|███▌ | 131097/371472 [10:25:17<21:38:29, 3.09it/s] 35%|███▌ | 131098/371472 [10:25:17<20:30:38, 3.26it/s] 35%|███▌ | 131099/371472 [10:25:18<19:13:16, 3.47it/s] 35%|███▌ | 131100/371472 [10:25:18<19:53:10, 3.36it/s] {'loss': 3.1171, 'learning_rate': 6.826854704906375e-07, 'epoch': 5.65} + 35%|███▌ | 131100/371472 [10:25:18<19:53:10, 3.36it/s] 35%|███▌ | 131101/371472 [10:25:18<20:44:24, 3.22it/s] 35%|███▌ | 131102/371472 [10:25:19<21:09:33, 3.16it/s] 35%|███▌ | 131103/371472 [10:25:19<19:38:21, 3.40it/s] 35%|███▌ | 131104/371472 [10:25:19<18:51:37, 3.54it/s] 35%|███▌ | 131105/371472 [10:25:19<19:07:20, 3.49it/s] 35%|███▌ | 131106/371472 [10:25:20<18:42:21, 3.57it/s] 35%|███▌ | 131107/371472 [10:25:20<18:17:57, 3.65it/s] 35%|███▌ | 131108/371472 [10:25:20<17:44:39, 3.76it/s] 35%|███▌ | 131109/371472 [10:25:21<18:33:54, 3.60it/s] 35%|███▌ | 131110/371472 [10:25:21<18:13:41, 3.66it/s] 35%|███▌ | 131111/371472 [10:25:21<19:06:48, 3.49it/s] 35%|███▌ | 131112/371472 [10:25:21<18:35:06, 3.59it/s] 35%|███▌ | 131113/371472 [10:25:22<20:23:35, 3.27it/s] 35%|███▌ | 131114/371472 [10:25:22<19:22:52, 3.44it/s] 35%|███▌ | 131115/371472 [10:25:22<18:37:20, 3.59it/s] 35%|███▌ | 131116/371472 [10:25:23<18:22:39, 3.63it/s] 35%|███▌ | 131117/371472 [10:25:23<18:40:39, 3.57it/s] 35%|███▌ | 131118/371472 [10:25:23<17:55:56, 3.72it/s] 35%|███▌ | 131119/371472 [10:25:23<17:42:46, 3.77it/s] 35%|███▌ | 131120/371472 [10:25:24<19:13:26, 3.47it/s] {'loss': 3.3204, 'learning_rate': 6.826369885151587e-07, 'epoch': 5.65} + 35%|███▌ | 131120/371472 [10:25:24<19:13:26, 3.47it/s] 35%|███▌ | 131121/371472 [10:25:24<20:22:38, 3.28it/s] 35%|███▌ | 131122/371472 [10:25:24<21:07:42, 3.16it/s] 35%|███▌ | 131123/371472 [10:25:25<22:22:12, 2.98it/s] 35%|███▌ | 131124/371472 [10:25:25<21:19:48, 3.13it/s] 35%|███▌ | 131125/371472 [10:25:25<19:50:03, 3.37it/s] 35%|███▌ | 131126/371472 [10:25:26<20:39:21, 3.23it/s] 35%|███▌ | 131127/371472 [10:25:26<19:33:48, 3.41it/s] 35%|███▌ | 131128/371472 [10:25:26<18:48:08, 3.55it/s] 35%|███▌ | 131129/371472 [10:25:26<17:58:42, 3.71it/s] 35%|███▌ | 131130/371472 [10:25:27<18:05:08, 3.69it/s] 35%|███▌ | 131131/371472 [10:25:27<18:01:30, 3.70it/s] 35%|███▌ | 131132/371472 [10:25:27<17:57:28, 3.72it/s] 35%|███▌ | 131133/371472 [10:25:27<17:40:37, 3.78it/s] 35%|███▌ | 131134/371472 [10:25:28<17:32:31, 3.81it/s] 35%|███▌ | 131135/371472 [10:25:28<17:41:32, 3.77it/s] 35%|███▌ | 131136/371472 [10:25:28<17:46:30, 3.76it/s] 35%|███▌ | 131137/371472 [10:25:28<17:39:31, 3.78it/s] 35%|███▌ | 131138/371472 [10:25:29<17:36:58, 3.79it/s] 35%|███▌ | 131139/371472 [10:25:29<18:12:41, 3.67it/s] 35%|███▌ | 131140/371472 [10:25:29<18:28:14, 3.61it/s] {'loss': 3.0589, 'learning_rate': 6.825885065396798e-07, 'epoch': 5.65} + 35%|███▌ | 131140/371472 [10:25:29<18:28:14, 3.61it/s] 35%|███▌ | 131141/371472 [10:25:30<18:02:23, 3.70it/s] 35%|███▌ | 131142/371472 [10:25:30<18:35:49, 3.59it/s] 35%|███▌ | 131143/371472 [10:25:30<19:16:39, 3.46it/s] 35%|███▌ | 131144/371472 [10:25:30<18:52:28, 3.54it/s] 35%|███▌ | 131145/371472 [10:25:31<18:24:05, 3.63it/s] 35%|███▌ | 131146/371472 [10:25:31<18:04:12, 3.69it/s] 35%|███▌ | 131147/371472 [10:25:31<19:18:13, 3.46it/s] 35%|███▌ | 131148/371472 [10:25:32<18:29:28, 3.61it/s] 35%|███▌ | 131149/371472 [10:25:32<19:34:46, 3.41it/s] 35%|███▌ | 131150/371472 [10:25:32<19:24:57, 3.44it/s] 35%|███▌ | 131151/371472 [10:25:32<18:45:32, 3.56it/s] 35%|███▌ | 131152/371472 [10:25:33<18:11:37, 3.67it/s] 35%|███▌ | 131153/371472 [10:25:33<18:08:10, 3.68it/s] 35%|███▌ | 131154/371472 [10:25:33<18:08:34, 3.68it/s] 35%|███▌ | 131155/371472 [10:25:33<18:03:20, 3.70it/s] 35%|███▌ | 131156/371472 [10:25:34<18:01:06, 3.70it/s] 35%|███▌ | 131157/371472 [10:25:34<18:55:31, 3.53it/s] 35%|███▌ | 131158/371472 [10:25:34<18:55:59, 3.53it/s] 35%|███▌ | 131159/371472 [10:25:35<18:26:25, 3.62it/s] 35%|███▌ | 131160/371472 [10:25:35<20:56:15, 3.19it/s] {'loss': 3.1474, 'learning_rate': 6.825400245642009e-07, 'epoch': 5.65} + 35%|███▌ | 131160/371472 [10:25:35<20:56:15, 3.19it/s] 35%|███▌ | 131161/371472 [10:25:35<20:45:10, 3.22it/s] 35%|███▌ | 131162/371472 [10:25:36<19:21:30, 3.45it/s] 35%|███▌ | 131163/371472 [10:25:36<18:57:14, 3.52it/s] 35%|███▌ | 131164/371472 [10:25:36<18:18:19, 3.65it/s] 35%|███▌ | 131165/371472 [10:25:36<19:08:32, 3.49it/s] 35%|███▌ | 131166/371472 [10:25:37<20:06:08, 3.32it/s] 35%|███▌ | 131167/371472 [10:25:37<20:42:24, 3.22it/s] 35%|███▌ | 131168/371472 [10:25:37<19:22:47, 3.44it/s] 35%|███▌ | 131169/371472 [10:25:38<19:39:39, 3.40it/s] 35%|███▌ | 131170/371472 [10:25:38<19:13:31, 3.47it/s] 35%|███▌ | 131171/371472 [10:25:38<18:46:14, 3.56it/s] 35%|███▌ | 131172/371472 [10:25:38<18:13:56, 3.66it/s] 35%|███▌ | 131173/371472 [10:25:39<19:03:55, 3.50it/s] 35%|███▌ | 131174/371472 [10:25:39<19:15:09, 3.47it/s] 35%|███▌ | 131175/371472 [10:25:39<20:16:02, 3.29it/s] 35%|███▌ | 131176/371472 [10:25:40<20:09:45, 3.31it/s] 35%|███▌ | 131177/371472 [10:25:40<19:13:15, 3.47it/s] 35%|███▌ | 131178/371472 [10:25:40<18:47:30, 3.55it/s] 35%|███▌ | 131179/371472 [10:25:40<18:06:55, 3.68it/s] 35%|███▌ | 131180/371472 [10:25:41<18:45:17, 3.56it/s] {'loss': 3.2572, 'learning_rate': 6.824915425887219e-07, 'epoch': 5.65} + 35%|███▌ | 131180/371472 [10:25:41<18:45:17, 3.56it/s] 35%|███▌ | 131181/371472 [10:25:41<18:28:54, 3.61it/s] 35%|███▌ | 131182/371472 [10:25:41<19:12:51, 3.47it/s] 35%|███▌ | 131183/371472 [10:25:42<20:03:42, 3.33it/s] 35%|███▌ | 131184/371472 [10:25:42<19:28:37, 3.43it/s] 35%|███▌ | 131185/371472 [10:25:42<21:44:29, 3.07it/s] 35%|███▌ | 131186/371472 [10:25:43<20:26:18, 3.27it/s] 35%|███▌ | 131187/371472 [10:25:43<20:09:08, 3.31it/s] 35%|███▌ | 131188/371472 [10:25:43<19:11:33, 3.48it/s] 35%|███▌ | 131189/371472 [10:25:43<18:43:26, 3.56it/s] 35%|███▌ | 131190/371472 [10:25:44<18:01:11, 3.70it/s] 35%|███▌ | 131191/371472 [10:25:44<17:51:43, 3.74it/s] 35%|███▌ | 131192/371472 [10:25:44<17:20:14, 3.85it/s] 35%|███▌ | 131193/371472 [10:25:44<16:57:20, 3.94it/s] 35%|███▌ | 131194/371472 [10:25:45<17:26:25, 3.83it/s] 35%|███▌ | 131195/371472 [10:25:45<17:53:43, 3.73it/s] 35%|███▌ | 131196/371472 [10:25:45<19:38:56, 3.40it/s] 35%|███▌ | 131197/371472 [10:25:46<19:07:29, 3.49it/s] 35%|███▌ | 131198/371472 [10:25:46<18:55:03, 3.53it/s] 35%|███▌ | 131199/371472 [10:25:46<18:22:12, 3.63it/s] 35%|███▌ | 131200/371472 [10:25:46<17:55:00, 3.73it/s] {'loss': 2.9921, 'learning_rate': 6.824430606132431e-07, 'epoch': 5.65} + 35%|███▌ | 131200/371472 [10:25:46<17:55:00, 3.73it/s] 35%|███▌ | 131201/371472 [10:25:47<20:02:27, 3.33it/s] 35%|███▌ | 131202/371472 [10:25:47<20:33:13, 3.25it/s] 35%|███▌ | 131203/371472 [10:25:47<18:59:59, 3.51it/s] 35%|███▌ | 131204/371472 [10:25:48<18:32:30, 3.60it/s] 35%|███▌ | 131205/371472 [10:25:48<18:50:31, 3.54it/s] 35%|███▌ | 131206/371472 [10:25:48<18:04:56, 3.69it/s] 35%|███▌ | 131207/371472 [10:25:48<17:48:05, 3.75it/s] 35%|███▌ | 131208/371472 [10:25:49<18:55:49, 3.53it/s] 35%|███▌ | 131209/371472 [10:25:49<18:29:39, 3.61it/s] 35%|███▌ | 131210/371472 [10:25:49<18:34:14, 3.59it/s] 35%|███▌ | 131211/371472 [10:25:49<17:56:23, 3.72it/s] 35%|███▌ | 131212/371472 [10:25:50<17:54:54, 3.73it/s] 35%|███▌ | 131213/371472 [10:25:50<18:25:10, 3.62it/s] 35%|███▌ | 131214/371472 [10:25:50<19:28:10, 3.43it/s] 35%|███▌ | 131215/371472 [10:25:51<20:03:25, 3.33it/s] 35%|███▌ | 131216/371472 [10:25:51<18:51:32, 3.54it/s] 35%|███▌ | 131217/371472 [10:25:51<18:20:53, 3.64it/s] 35%|███▌ | 131218/371472 [10:25:51<17:21:39, 3.84it/s] 35%|███▌ | 131219/371472 [10:25:52<17:08:51, 3.89it/s] 35%|███▌ | 131220/371472 [10:25:52<17:45:04, 3.76it/s] {'loss': 3.2725, 'learning_rate': 6.823945786377642e-07, 'epoch': 5.65} + 35%|███▌ | 131220/371472 [10:25:52<17:45:04, 3.76it/s] 35%|███▌ | 131221/371472 [10:25:52<17:53:25, 3.73it/s] 35%|███▌ | 131222/371472 [10:25:52<17:50:45, 3.74it/s] 35%|███▌ | 131223/371472 [10:25:53<18:11:26, 3.67it/s] 35%|███▌ | 131224/371472 [10:25:53<18:14:29, 3.66it/s] 35%|███▌ | 131225/371472 [10:25:53<19:34:21, 3.41it/s] 35%|███▌ | 131226/371472 [10:25:54<19:45:19, 3.38it/s] 35%|███▌ | 131227/371472 [10:25:54<19:00:00, 3.51it/s] 35%|███▌ | 131228/371472 [10:25:54<18:24:54, 3.62it/s] 35%|███▌ | 131229/371472 [10:25:54<17:43:43, 3.76it/s] 35%|███▌ | 131230/371472 [10:25:55<18:24:21, 3.63it/s] 35%|███▌ | 131231/371472 [10:25:55<17:50:10, 3.74it/s] 35%|███▌ | 131232/371472 [10:25:55<18:52:04, 3.54it/s] 35%|███▌ | 131233/371472 [10:25:56<19:06:58, 3.49it/s] 35%|███▌ | 131234/371472 [10:25:56<18:54:11, 3.53it/s] 35%|███▌ | 131235/371472 [10:25:56<18:50:31, 3.54it/s] 35%|███▌ | 131236/371472 [10:25:56<19:18:44, 3.46it/s] 35%|███▌ | 131237/371472 [10:25:57<19:50:48, 3.36it/s] 35%|███▌ | 131238/371472 [10:25:57<19:55:36, 3.35it/s] 35%|███▌ | 131239/371472 [10:25:57<18:46:47, 3.55it/s] 35%|███▌ | 131240/371472 [10:25:58<19:44:40, 3.38it/s] {'loss': 3.1645, 'learning_rate': 6.823460966622853e-07, 'epoch': 5.65} + 35%|███▌ | 131240/371472 [10:25:58<19:44:40, 3.38it/s] 35%|███▌ | 131241/371472 [10:25:58<20:02:53, 3.33it/s] 35%|███▌ | 131242/371472 [10:25:58<19:27:01, 3.43it/s] 35%|███▌ | 131243/371472 [10:25:58<18:34:27, 3.59it/s] 35%|███▌ | 131244/371472 [10:25:59<18:47:39, 3.55it/s] 35%|███▌ | 131245/371472 [10:25:59<18:05:40, 3.69it/s] 35%|███▌ | 131246/371472 [10:25:59<17:34:23, 3.80it/s] 35%|███▌ | 131247/371472 [10:25:59<17:38:54, 3.78it/s] 35%|███▌ | 131248/371472 [10:26:00<17:22:31, 3.84it/s] 35%|███▌ | 131249/371472 [10:26:00<18:34:59, 3.59it/s] 35%|███▌ | 131250/371472 [10:26:00<18:03:36, 3.69it/s] 35%|███▌ | 131251/371472 [10:26:01<17:32:39, 3.80it/s] 35%|███▌ | 131252/371472 [10:26:01<16:51:41, 3.96it/s] 35%|███▌ | 131253/371472 [10:26:01<17:33:32, 3.80it/s] 35%|███▌ | 131254/371472 [10:26:01<17:14:08, 3.87it/s] 35%|███▌ | 131255/371472 [10:26:02<17:32:33, 3.80it/s] 35%|███▌ | 131256/371472 [10:26:02<19:59:27, 3.34it/s] 35%|███▌ | 131257/371472 [10:26:02<19:19:20, 3.45it/s] 35%|███▌ | 131258/371472 [10:26:02<18:44:21, 3.56it/s] 35%|███▌ | 131259/371472 [10:26:03<18:35:48, 3.59it/s] 35%|███▌ | 131260/371472 [10:26:03<18:54:45, 3.53it/s] {'loss': 3.2603, 'learning_rate': 6.822976146868064e-07, 'epoch': 5.65} + 35%|███▌ | 131260/371472 [10:26:03<18:54:45, 3.53it/s] 35%|███▌ | 131261/371472 [10:26:03<18:55:04, 3.53it/s] 35%|███▌ | 131262/371472 [10:26:04<18:17:43, 3.65it/s] 35%|███▌ | 131263/371472 [10:26:04<19:54:42, 3.35it/s] 35%|███▌ | 131264/371472 [10:26:04<20:08:36, 3.31it/s] 35%|███▌ | 131265/371472 [10:26:05<19:16:52, 3.46it/s] 35%|███▌ | 131266/371472 [10:26:05<18:14:31, 3.66it/s] 35%|███▌ | 131267/371472 [10:26:05<18:45:13, 3.56it/s] 35%|███▌ | 131268/371472 [10:26:05<18:34:42, 3.59it/s] 35%|███▌ | 131269/371472 [10:26:06<19:21:45, 3.45it/s] 35%|███▌ | 131270/371472 [10:26:06<18:43:44, 3.56it/s] 35%|███▌ | 131271/371472 [10:26:06<18:36:51, 3.58it/s] 35%|███▌ | 131272/371472 [10:26:06<19:03:01, 3.50it/s] 35%|███▌ | 131273/371472 [10:26:07<18:07:02, 3.68it/s] 35%|███▌ | 131274/371472 [10:26:07<21:57:58, 3.04it/s] 35%|███▌ | 131275/371472 [10:26:07<21:15:52, 3.14it/s] 35%|███▌ | 131276/371472 [10:26:08<20:59:01, 3.18it/s] 35%|███▌ | 131277/371472 [10:26:08<21:30:56, 3.10it/s] 35%|███▌ | 131278/371472 [10:26:08<20:59:13, 3.18it/s] 35%|███▌ | 131279/371472 [10:26:09<19:52:14, 3.36it/s] 35%|███▌ | 131280/371472 [10:26:09<19:17:31, 3.46it/s] {'loss': 3.3358, 'learning_rate': 6.822491327113277e-07, 'epoch': 5.65} + 35%|███▌ | 131280/371472 [10:26:09<19:17:31, 3.46it/s] 35%|███▌ | 131281/371472 [10:26:09<18:26:55, 3.62it/s] 35%|███▌ | 131282/371472 [10:26:09<18:30:24, 3.61it/s] 35%|███▌ | 131283/371472 [10:26:10<18:18:00, 3.65it/s] 35%|███▌ | 131284/371472 [10:26:10<18:21:09, 3.64it/s] 35%|███▌ | 131285/371472 [10:26:10<18:44:04, 3.56it/s] 35%|███▌ | 131286/371472 [10:26:11<18:28:30, 3.61it/s] 35%|███▌ | 131287/371472 [10:26:11<18:29:49, 3.61it/s] 35%|███▌ | 131288/371472 [10:26:11<18:34:38, 3.59it/s] 35%|███▌ | 131289/371472 [10:26:11<17:57:26, 3.72it/s] 35%|███▌ | 131290/371472 [10:26:12<17:45:56, 3.76it/s] 35%|███▌ | 131291/371472 [10:26:12<18:19:01, 3.64it/s] 35%|███▌ | 131292/371472 [10:26:12<17:45:02, 3.76it/s] 35%|███▌ | 131293/371472 [10:26:12<17:28:46, 3.82it/s] 35%|███▌ | 131294/371472 [10:26:13<17:01:42, 3.92it/s] 35%|███▌ | 131295/371472 [10:26:13<17:24:34, 3.83it/s] 35%|███▌ | 131296/371472 [10:26:13<19:03:14, 3.50it/s] 35%|███▌ | 131297/371472 [10:26:14<18:12:19, 3.66it/s] 35%|███▌ | 131298/371472 [10:26:14<18:18:42, 3.64it/s] 35%|███▌ | 131299/371472 [10:26:14<18:08:43, 3.68it/s] 35%|███▌ | 131300/371472 [10:26:14<17:50:45, 3.74it/s] {'loss': 3.2891, 'learning_rate': 6.822006507358487e-07, 'epoch': 5.66} + 35%|███▌ | 131300/371472 [10:26:14<17:50:45, 3.74it/s] 35%|███▌ | 131301/371472 [10:26:15<18:34:13, 3.59it/s] 35%|███▌ | 131302/371472 [10:26:15<18:32:35, 3.60it/s] 35%|███▌ | 131303/371472 [10:26:15<18:27:02, 3.62it/s] 35%|███▌ | 131304/371472 [10:26:15<18:32:11, 3.60it/s] 35%|███▌ | 131305/371472 [10:26:16<18:47:12, 3.55it/s] 35%|███▌ | 131306/371472 [10:26:16<18:13:04, 3.66it/s] 35%|███▌ | 131307/371472 [10:26:16<18:13:36, 3.66it/s] 35%|███▌ | 131308/371472 [10:26:17<18:01:48, 3.70it/s] 35%|███▌ | 131309/371472 [10:26:17<18:11:52, 3.67it/s] 35%|███▌ | 131310/371472 [10:26:17<18:10:15, 3.67it/s] 35%|███▌ | 131311/371472 [10:26:17<18:18:26, 3.64it/s] 35%|███▌ | 131312/371472 [10:26:18<18:14:39, 3.66it/s] 35%|███▌ | 131313/371472 [10:26:18<17:51:28, 3.74it/s] 35%|███▌ | 131314/371472 [10:26:18<18:40:49, 3.57it/s] 35%|███▌ | 131315/371472 [10:26:18<17:57:37, 3.71it/s] 35%|███▌ | 131316/371472 [10:26:19<18:52:22, 3.53it/s] 35%|███▌ | 131317/371472 [10:26:19<18:24:32, 3.62it/s] 35%|███▌ | 131318/371472 [10:26:19<18:39:24, 3.58it/s] 35%|███▌ | 131319/371472 [10:26:20<18:49:42, 3.54it/s] 35%|███▌ | 131320/371472 [10:26:20<18:13:27, 3.66it/s] {'loss': 3.1934, 'learning_rate': 6.821521687603697e-07, 'epoch': 5.66} + 35%|███▌ | 131320/371472 [10:26:20<18:13:27, 3.66it/s] 35%|███▌ | 131321/371472 [10:26:20<19:58:28, 3.34it/s] 35%|███▌ | 131322/371472 [10:26:21<19:21:38, 3.45it/s] 35%|███▌ | 131323/371472 [10:26:21<18:23:15, 3.63it/s] 35%|███▌ | 131324/371472 [10:26:21<17:41:38, 3.77it/s] 35%|███▌ | 131325/371472 [10:26:21<18:25:38, 3.62it/s] 35%|███▌ | 131326/371472 [10:26:22<17:50:27, 3.74it/s] 35%|███▌ | 131327/371472 [10:26:22<18:27:41, 3.61it/s] 35%|███▌ | 131328/371472 [10:26:22<18:24:30, 3.62it/s] 35%|███▌ | 131329/371472 [10:26:22<18:47:18, 3.55it/s] 35%|███▌ | 131330/371472 [10:26:23<19:14:46, 3.47it/s] 35%|███▌ | 131331/371472 [10:26:23<21:30:24, 3.10it/s] 35%|███▌ | 131332/371472 [10:26:23<20:20:30, 3.28it/s] 35%|███▌ | 131333/371472 [10:26:24<21:01:04, 3.17it/s] 35%|███▌ | 131334/371472 [10:26:24<20:26:03, 3.26it/s] 35%|███▌ | 131335/371472 [10:26:24<19:19:31, 3.45it/s] 35%|███▌ | 131336/371472 [10:26:24<18:25:37, 3.62it/s] 35%|███▌ | 131337/371472 [10:26:25<18:16:33, 3.65it/s] 35%|███▌ | 131338/371472 [10:26:25<18:38:22, 3.58it/s] 35%|███▌ | 131339/371472 [10:26:25<19:08:30, 3.48it/s] 35%|███▌ | 131340/371472 [10:26:26<18:58:15, 3.52it/s] {'loss': 2.972, 'learning_rate': 6.821036867848908e-07, 'epoch': 5.66} + 35%|███▌ | 131340/371472 [10:26:26<18:58:15, 3.52it/s] 35%|███▌ | 131341/371472 [10:26:26<18:56:42, 3.52it/s] 35%|███▌ | 131342/371472 [10:26:26<18:23:57, 3.63it/s] 35%|███▌ | 131343/371472 [10:26:26<17:54:01, 3.73it/s] 35%|███▌ | 131344/371472 [10:26:27<18:14:34, 3.66it/s] 35%|███▌ | 131345/371472 [10:26:27<18:23:32, 3.63it/s] 35%|███▌ | 131346/371472 [10:26:27<19:08:43, 3.48it/s] 35%|███▌ | 131347/371472 [10:26:28<18:16:51, 3.65it/s] 35%|███▌ | 131348/371472 [10:26:28<18:29:34, 3.61it/s] 35%|███▌ | 131349/371472 [10:26:28<18:37:16, 3.58it/s] 35%|███▌ | 131350/371472 [10:26:28<18:05:50, 3.69it/s] 35%|███▌ | 131351/371472 [10:26:29<17:55:21, 3.72it/s] 35%|███▌ | 131352/371472 [10:26:29<17:53:34, 3.73it/s] 35%|███▌ | 131353/371472 [10:26:29<17:20:07, 3.85it/s] 35%|███▌ | 131354/371472 [10:26:29<16:45:32, 3.98it/s] 35%|███▌ | 131355/371472 [10:26:30<16:47:05, 3.97it/s] 35%|███▌ | 131356/371472 [10:26:30<16:38:20, 4.01it/s] 35%|███▌ | 131357/371472 [10:26:30<16:45:37, 3.98it/s] 35%|███▌ | 131358/371472 [10:26:30<17:06:39, 3.90it/s] 35%|███▌ | 131359/371472 [10:26:31<17:25:51, 3.83it/s] 35%|███▌ | 131360/371472 [10:26:31<18:46:07, 3.55it/s] {'loss': 3.233, 'learning_rate': 6.82055204809412e-07, 'epoch': 5.66} + 35%|███▌ | 131360/371472 [10:26:31<18:46:07, 3.55it/s] 35%|███▌ | 131361/371472 [10:26:31<19:04:34, 3.50it/s] 35%|███▌ | 131362/371472 [10:26:32<18:52:44, 3.53it/s] 35%|███▌ | 131363/371472 [10:26:32<19:12:33, 3.47it/s] 35%|███▌ | 131364/371472 [10:26:32<19:16:02, 3.46it/s] 35%|███▌ | 131365/371472 [10:26:32<19:39:29, 3.39it/s] 35%|███▌ | 131366/371472 [10:26:33<18:36:07, 3.59it/s] 35%|███▌ | 131367/371472 [10:26:33<18:32:04, 3.60it/s] 35%|███▌ | 131368/371472 [10:26:33<19:01:17, 3.51it/s] 35%|███▌ | 131369/371472 [10:26:34<18:58:17, 3.52it/s] 35%|███▌ | 131370/371472 [10:26:34<18:53:11, 3.53it/s] 35%|███▌ | 131371/371472 [10:26:34<20:32:40, 3.25it/s] 35%|███▌ | 131372/371472 [10:26:34<19:16:07, 3.46it/s] 35%|███▌ | 131373/371472 [10:26:35<19:33:30, 3.41it/s] 35%|███▌ | 131374/371472 [10:26:35<18:52:05, 3.53it/s] 35%|███▌ | 131375/371472 [10:26:35<19:19:55, 3.45it/s] 35%|███▌ | 131376/371472 [10:26:36<19:47:19, 3.37it/s] 35%|███▌ | 131377/371472 [10:26:36<19:30:45, 3.42it/s] 35%|███▌ | 131378/371472 [10:26:36<22:52:21, 2.92it/s] 35%|███▌ | 131379/371472 [10:26:37<21:19:14, 3.13it/s] 35%|███▌ | 131380/371472 [10:26:37<20:17:21, 3.29it/s] {'loss': 3.3083, 'learning_rate': 6.82006722833933e-07, 'epoch': 5.66} + 35%|███▌ | 131380/371472 [10:26:37<20:17:21, 3.29it/s] 35%|███▌ | 131381/371472 [10:26:37<19:43:05, 3.38it/s] 35%|███▌ | 131382/371472 [10:26:37<19:20:40, 3.45it/s] 35%|███▌ | 131383/371472 [10:26:38<18:40:36, 3.57it/s] 35%|███▌ | 131384/371472 [10:26:38<20:13:28, 3.30it/s] 35%|███▌ | 131385/371472 [10:26:38<19:49:29, 3.36it/s] 35%|███▌ | 131386/371472 [10:26:39<18:40:06, 3.57it/s] 35%|███▌ | 131387/371472 [10:26:39<18:31:49, 3.60it/s] 35%|███▌ | 131388/371472 [10:26:39<17:49:53, 3.74it/s] 35%|███▌ | 131389/371472 [10:26:39<17:49:16, 3.74it/s] 35%|███▌ | 131390/371472 [10:26:40<18:38:03, 3.58it/s] 35%|███▌ | 131391/371472 [10:26:40<19:06:53, 3.49it/s] 35%|███▌ | 131392/371472 [10:26:40<18:15:50, 3.65it/s] 35%|███▌ | 131393/371472 [10:26:41<17:53:43, 3.73it/s] 35%|███▌ | 131394/371472 [10:26:41<18:09:58, 3.67it/s] 35%|███▌ | 131395/371472 [10:26:41<17:38:19, 3.78it/s] 35%|███▌ | 131396/371472 [10:26:41<17:19:09, 3.85it/s] 35%|███▌ | 131397/371472 [10:26:42<17:56:35, 3.72it/s] 35%|███▌ | 131398/371472 [10:26:42<18:05:06, 3.69it/s] 35%|███▌ | 131399/371472 [10:26:42<17:51:17, 3.73it/s] 35%|███▌ | 131400/371472 [10:26:42<19:55:13, 3.35it/s] {'loss': 3.2913, 'learning_rate': 6.819582408584541e-07, 'epoch': 5.66} + 35%|███▌ | 131400/371472 [10:26:42<19:55:13, 3.35it/s] 35%|███▌ | 131401/371472 [10:26:43<19:04:43, 3.50it/s] 35%|███▌ | 131402/371472 [10:26:43<20:04:36, 3.32it/s] 35%|███▌ | 131403/371472 [10:26:43<19:39:54, 3.39it/s] 35%|███▌ | 131404/371472 [10:26:44<19:36:31, 3.40it/s] 35%|███▌ | 131405/371472 [10:26:44<19:46:31, 3.37it/s] 35%|███▌ | 131406/371472 [10:26:44<18:54:53, 3.53it/s] 35%|███▌ | 131407/371472 [10:26:44<18:57:14, 3.52it/s] 35%|███▌ | 131408/371472 [10:26:45<18:55:49, 3.52it/s] 35%|███▌ | 131409/371472 [10:26:45<19:00:14, 3.51it/s] 35%|███▌ | 131410/371472 [10:26:45<18:29:15, 3.61it/s] 35%|███▌ | 131411/371472 [10:26:46<18:16:08, 3.65it/s] 35%|███▌ | 131412/371472 [10:26:46<18:53:42, 3.53it/s] 35%|███▌ | 131413/371472 [10:26:46<18:18:40, 3.64it/s] 35%|███▌ | 131414/371472 [10:26:46<18:23:09, 3.63it/s] 35%|███▌ | 131415/371472 [10:26:47<17:44:42, 3.76it/s] 35%|███▌ | 131416/371472 [10:26:47<17:59:44, 3.71it/s] 35%|███▌ | 131417/371472 [10:26:47<18:11:57, 3.66it/s] 35%|███▌ | 131418/371472 [10:26:48<18:45:14, 3.56it/s] 35%|███▌ | 131419/371472 [10:26:48<18:10:02, 3.67it/s] 35%|███▌ | 131420/371472 [10:26:48<18:08:05, 3.68it/s] {'loss': 3.2096, 'learning_rate': 6.819097588829753e-07, 'epoch': 5.66} + 35%|███▌ | 131420/371472 [10:26:48<18:08:05, 3.68it/s] 35%|███▌ | 131421/371472 [10:26:48<18:28:04, 3.61it/s] 35%|███▌ | 131422/371472 [10:26:49<17:42:05, 3.77it/s] 35%|███▌ | 131423/371472 [10:26:49<17:25:56, 3.83it/s] 35%|███▌ | 131424/371472 [10:26:49<17:10:48, 3.88it/s] 35%|███▌ | 131425/371472 [10:26:49<17:58:01, 3.71it/s] 35%|███▌ | 131426/371472 [10:26:50<17:45:19, 3.76it/s] 35%|███▌ | 131427/371472 [10:26:50<17:19:17, 3.85it/s] 35%|███▌ | 131428/371472 [10:26:50<17:15:15, 3.86it/s] 35%|███▌ | 131429/371472 [10:26:51<19:40:07, 3.39it/s] 35%|███▌ | 131430/371472 [10:26:51<19:30:03, 3.42it/s] 35%|███▌ | 131431/371472 [10:26:51<19:49:16, 3.36it/s] 35%|███▌ | 131432/371472 [10:26:51<19:16:09, 3.46it/s] 35%|███▌ | 131433/371472 [10:26:52<18:45:09, 3.56it/s] 35%|███▌ | 131434/371472 [10:26:52<19:06:33, 3.49it/s] 35%|███▌ | 131435/371472 [10:26:52<19:48:31, 3.37it/s] 35%|███▌ | 131436/371472 [10:26:53<19:29:08, 3.42it/s] 35%|███▌ | 131437/371472 [10:26:53<18:33:00, 3.59it/s] 35%|███▌ | 131438/371472 [10:26:53<18:47:18, 3.55it/s] 35%|███▌ | 131439/371472 [10:26:53<18:33:44, 3.59it/s] 35%|███▌ | 131440/371472 [10:26:54<18:58:08, 3.51it/s] {'loss': 3.1739, 'learning_rate': 6.818612769074963e-07, 'epoch': 5.66} + 35%|███▌ | 131440/371472 [10:26:54<18:58:08, 3.51it/s] 35%|███▌ | 131441/371472 [10:26:54<20:22:59, 3.27it/s] 35%|███▌ | 131442/371472 [10:26:54<20:31:25, 3.25it/s] 35%|███▌ | 131443/371472 [10:26:55<20:21:12, 3.28it/s] 35%|███▌ | 131444/371472 [10:26:55<21:22:24, 3.12it/s] 35%|███▌ | 131445/371472 [10:26:55<21:39:49, 3.08it/s] 35%|███▌ | 131446/371472 [10:26:56<20:12:33, 3.30it/s] 35%|███▌ | 131447/371472 [10:26:56<20:03:27, 3.32it/s] 35%|███▌ | 131448/371472 [10:26:56<18:47:06, 3.55it/s] 35%|███▌ | 131449/371472 [10:26:56<18:44:44, 3.56it/s] 35%|███▌ | 131450/371472 [10:26:57<18:39:54, 3.57it/s] 35%|███▌ | 131451/371472 [10:26:57<18:41:00, 3.57it/s] 35%|███▌ | 131452/371472 [10:26:57<18:13:25, 3.66it/s] 35%|███▌ | 131453/371472 [10:26:58<20:05:24, 3.32it/s] 35%|███▌ | 131454/371472 [10:26:58<19:23:49, 3.44it/s] 35%|███▌ | 131455/371472 [10:26:58<19:10:34, 3.48it/s] 35%|███▌ | 131456/371472 [10:26:58<19:32:13, 3.41it/s] 35%|███▌ | 131457/371472 [10:26:59<20:55:26, 3.19it/s] 35%|███▌ | 131458/371472 [10:26:59<20:03:00, 3.33it/s] 35%|███▌ | 131459/371472 [10:26:59<19:58:09, 3.34it/s] 35%|███▌ | 131460/371472 [10:27:00<20:14:19, 3.29it/s] {'loss': 3.2092, 'learning_rate': 6.818127949320174e-07, 'epoch': 5.66} + 35%|███▌ | 131460/371472 [10:27:00<20:14:19, 3.29it/s] 35%|███▌ | 131461/371472 [10:27:00<19:23:24, 3.44it/s] 35%|███▌ | 131462/371472 [10:27:00<19:01:17, 3.50it/s] 35%|███▌ | 131463/371472 [10:27:01<20:17:14, 3.29it/s] 35%|███▌ | 131464/371472 [10:27:01<19:03:04, 3.50it/s] 35%|███▌ | 131465/371472 [10:27:01<19:14:30, 3.46it/s] 35%|███▌ | 131466/371472 [10:27:01<18:20:23, 3.64it/s] 35%|███▌ | 131467/371472 [10:27:02<19:08:56, 3.48it/s] 35%|███▌ | 131468/371472 [10:27:02<19:08:07, 3.48it/s] 35%|███▌ | 131469/371472 [10:27:02<19:18:16, 3.45it/s] 35%|███▌ | 131470/371472 [10:27:02<18:26:01, 3.62it/s] 35%|███▌ | 131471/371472 [10:27:03<18:10:13, 3.67it/s] 35%|███▌ | 131472/371472 [10:27:03<17:45:16, 3.75it/s] 35%|███▌ | 131473/371472 [10:27:03<17:36:12, 3.79it/s] 35%|███▌ | 131474/371472 [10:27:04<19:15:23, 3.46it/s] 35%|███▌ | 131475/371472 [10:27:04<18:57:22, 3.52it/s] 35%|███▌ | 131476/371472 [10:27:04<18:23:51, 3.62it/s] 35%|███▌ | 131477/371472 [10:27:04<18:39:17, 3.57it/s] 35%|███▌ | 131478/371472 [10:27:05<17:54:06, 3.72it/s] 35%|███▌ | 131479/371472 [10:27:05<17:33:27, 3.80it/s] 35%|███▌ | 131480/371472 [10:27:05<17:19:24, 3.85it/s] {'loss': 3.0328, 'learning_rate': 6.817643129565385e-07, 'epoch': 5.66} + 35%|███▌ | 131480/371472 [10:27:05<17:19:24, 3.85it/s] 35%|███▌ | 131481/371472 [10:27:05<17:01:21, 3.92it/s] 35%|███▌ | 131482/371472 [10:27:06<17:19:00, 3.85it/s] 35%|███▌ | 131483/371472 [10:27:06<16:53:59, 3.94it/s] 35%|███▌ | 131484/371472 [10:27:06<17:49:54, 3.74it/s] 35%|███▌ | 131485/371472 [10:27:06<18:24:00, 3.62it/s] 35%|███▌ | 131486/371472 [10:27:07<18:15:19, 3.65it/s] 35%|███▌ | 131487/371472 [10:27:07<17:37:34, 3.78it/s] 35%|███▌ | 131488/371472 [10:27:07<17:34:58, 3.79it/s] 35%|███▌ | 131489/371472 [10:27:08<17:34:06, 3.79it/s] 35%|███▌ | 131490/371472 [10:27:08<17:32:48, 3.80it/s] 35%|███▌ | 131491/371472 [10:27:08<17:28:54, 3.81it/s] 35%|███▌ | 131492/371472 [10:27:08<18:06:00, 3.68it/s] 35%|███▌ | 131493/371472 [10:27:09<18:40:39, 3.57it/s] 35%|███▌ | 131494/371472 [10:27:09<18:29:52, 3.60it/s] 35%|███▌ | 131495/371472 [10:27:09<18:57:34, 3.52it/s] 35%|███▌ | 131496/371472 [10:27:09<18:50:25, 3.54it/s] 35%|███▌ | 131497/371472 [10:27:10<18:30:21, 3.60it/s] 35%|███▌ | 131498/371472 [10:27:10<17:51:59, 3.73it/s] 35%|███▌ | 131499/371472 [10:27:10<17:03:38, 3.91it/s] 35%|███▌ | 131500/371472 [10:27:10<16:42:38, 3.99it/s] {'loss': 3.2039, 'learning_rate': 6.817158309810597e-07, 'epoch': 5.66} + 35%|███▌ | 131500/371472 [10:27:10<16:42:38, 3.99it/s] 35%|███▌ | 131501/371472 [10:27:11<17:20:06, 3.85it/s] 35%|███▌ | 131502/371472 [10:27:11<17:04:41, 3.90it/s] 35%|███▌ | 131503/371472 [10:27:11<17:27:23, 3.82it/s] 35%|███▌ | 131504/371472 [10:27:12<17:58:46, 3.71it/s] 35%|███▌ | 131505/371472 [10:27:12<18:40:20, 3.57it/s] 35%|███▌ | 131506/371472 [10:27:12<19:17:51, 3.45it/s] 35%|███▌ | 131507/371472 [10:27:12<18:46:52, 3.55it/s] 35%|███▌ | 131508/371472 [10:27:13<19:46:50, 3.37it/s] 35%|███▌ | 131509/371472 [10:27:13<19:22:01, 3.44it/s] 35%|███▌ | 131510/371472 [10:27:13<18:39:49, 3.57it/s] 35%|███▌ | 131511/371472 [10:27:14<18:01:18, 3.70it/s] 35%|███▌ | 131512/371472 [10:27:14<17:18:58, 3.85it/s] 35%|███▌ | 131513/371472 [10:27:14<17:34:16, 3.79it/s] 35%|███▌ | 131514/371472 [10:27:14<17:59:51, 3.70it/s] 35%|███▌ | 131515/371472 [10:27:15<18:15:30, 3.65it/s] 35%|███▌ | 131516/371472 [10:27:15<17:51:39, 3.73it/s] 35%|███▌ | 131517/371472 [10:27:15<17:57:31, 3.71it/s] 35%|███▌ | 131518/371472 [10:27:15<17:27:20, 3.82it/s] 35%|███▌ | 131519/371472 [10:27:16<17:48:09, 3.74it/s] 35%|███▌ | 131520/371472 [10:27:16<17:30:26, 3.81it/s] {'loss': 3.255, 'learning_rate': 6.816673490055808e-07, 'epoch': 5.66} + 35%|███▌ | 131520/371472 [10:27:16<17:30:26, 3.81it/s] 35%|███▌ | 131521/371472 [10:27:16<18:02:52, 3.69it/s] 35%|███▌ | 131522/371472 [10:27:16<17:49:12, 3.74it/s] 35%|███▌ | 131523/371472 [10:27:17<17:28:14, 3.82it/s] 35%|███▌ | 131524/371472 [10:27:17<17:03:41, 3.91it/s] 35%|███▌ | 131525/371472 [10:27:17<17:12:20, 3.87it/s] 35%|███▌ | 131526/371472 [10:27:17<17:00:44, 3.92it/s] 35%|███▌ | 131527/371472 [10:27:18<18:17:20, 3.64it/s] 35%|███▌ | 131528/371472 [10:27:18<18:26:40, 3.61it/s] 35%|███▌ | 131529/371472 [10:27:18<18:28:36, 3.61it/s] 35%|███▌ | 131530/371472 [10:27:19<17:28:25, 3.81it/s] 35%|███▌ | 131531/371472 [10:27:19<17:09:39, 3.88it/s] 35%|███▌ | 131532/371472 [10:27:19<16:55:52, 3.94it/s] 35%|███▌ | 131533/371472 [10:27:19<16:37:11, 4.01it/s] 35%|███▌ | 131534/371472 [10:27:20<19:04:28, 3.49it/s] 35%|███▌ | 131535/371472 [10:27:20<18:28:13, 3.61it/s] 35%|███▌ | 131536/371472 [10:27:20<18:38:58, 3.57it/s] 35%|███▌ | 131537/371472 [10:27:20<18:26:49, 3.61it/s] 35%|███▌ | 131538/371472 [10:27:21<19:05:06, 3.49it/s] 35%|███▌ | 131539/371472 [10:27:21<18:51:06, 3.54it/s] 35%|███▌ | 131540/371472 [10:27:21<20:10:53, 3.30it/s] {'loss': 3.2335, 'learning_rate': 6.816188670301019e-07, 'epoch': 5.67} + 35%|███▌ | 131540/371472 [10:27:21<20:10:53, 3.30it/s] 35%|███▌ | 131541/371472 [10:27:22<18:52:28, 3.53it/s] 35%|███▌ | 131542/371472 [10:27:22<18:23:17, 3.62it/s] 35%|███▌ | 131543/371472 [10:27:22<18:09:27, 3.67it/s] 35%|███▌ | 131544/371472 [10:27:22<17:39:58, 3.77it/s] 35%|███▌ | 131545/371472 [10:27:23<18:39:10, 3.57it/s] 35%|███▌ | 131546/371472 [10:27:23<18:39:53, 3.57it/s] 35%|███▌ | 131547/371472 [10:27:23<18:43:26, 3.56it/s] 35%|███▌ | 131548/371472 [10:27:24<18:29:41, 3.60it/s] 35%|███▌ | 131549/371472 [10:27:24<19:52:56, 3.35it/s] 35%|███▌ | 131550/371472 [10:27:24<18:48:16, 3.54it/s] 35%|███▌ | 131551/371472 [10:27:24<17:55:06, 3.72it/s] 35%|███▌ | 131552/371472 [10:27:25<17:51:58, 3.73it/s] 35%|███▌ | 131553/371472 [10:27:25<17:51:06, 3.73it/s] 35%|███▌ | 131554/371472 [10:27:25<18:27:45, 3.61it/s] 35%|███▌ | 131555/371472 [10:27:26<18:34:39, 3.59it/s] 35%|███▌ | 131556/371472 [10:27:26<18:15:39, 3.65it/s] 35%|███▌ | 131557/371472 [10:27:26<17:26:42, 3.82it/s] 35%|███▌ | 131558/371472 [10:27:26<18:28:52, 3.61it/s] 35%|███▌ | 131559/371472 [10:27:27<18:02:01, 3.70it/s] 35%|███▌ | 131560/371472 [10:27:27<17:15:40, 3.86it/s] {'loss': 3.0177, 'learning_rate': 6.81570385054623e-07, 'epoch': 5.67} + 35%|███▌ | 131560/371472 [10:27:27<17:15:40, 3.86it/s] 35%|███▌ | 131561/371472 [10:27:27<17:18:25, 3.85it/s] 35%|███▌ | 131562/371472 [10:27:27<16:53:56, 3.94it/s] 35%|███▌ | 131563/371472 [10:27:28<17:06:36, 3.89it/s] 35%|███▌ | 131564/371472 [10:27:28<17:23:33, 3.83it/s] 35%|███▌ | 131565/371472 [10:27:28<17:35:54, 3.79it/s] 35%|███▌ | 131566/371472 [10:27:28<18:01:44, 3.70it/s] 35%|███▌ | 131567/371472 [10:27:29<17:46:56, 3.75it/s] 35%|███▌ | 131568/371472 [10:27:29<18:30:16, 3.60it/s] 35%|███▌ | 131569/371472 [10:27:29<18:50:31, 3.54it/s] 35%|███▌ | 131570/371472 [10:27:30<18:35:23, 3.58it/s] 35%|███▌ | 131571/371472 [10:27:30<18:33:52, 3.59it/s] 35%|███▌ | 131572/371472 [10:27:30<18:37:16, 3.58it/s] 35%|███▌ | 131573/371472 [10:27:30<19:05:52, 3.49it/s] 35%|███▌ | 131574/371472 [10:27:31<18:41:19, 3.57it/s] 35%|███▌ | 131575/371472 [10:27:31<19:34:24, 3.40it/s] 35%|███▌ | 131576/371472 [10:27:31<18:29:14, 3.60it/s] 35%|███▌ | 131577/371472 [10:27:32<18:14:51, 3.65it/s] 35%|███▌ | 131578/371472 [10:27:32<19:28:25, 3.42it/s] 35%|███▌ | 131579/371472 [10:27:32<18:42:32, 3.56it/s] 35%|███▌ | 131580/371472 [10:27:32<18:44:36, 3.56it/s] {'loss': 3.1171, 'learning_rate': 6.815219030791441e-07, 'epoch': 5.67} + 35%|███▌ | 131580/371472 [10:27:32<18:44:36, 3.56it/s] 35%|███▌ | 131581/371472 [10:27:33<18:22:33, 3.63it/s] 35%|███▌ | 131582/371472 [10:27:33<17:57:24, 3.71it/s] 35%|███▌ | 131583/371472 [10:27:33<19:17:37, 3.45it/s] 35%|███▌ | 131584/371472 [10:27:33<18:10:25, 3.67it/s] 35%|███▌ | 131585/371472 [10:27:34<17:45:21, 3.75it/s] 35%|███▌ | 131586/371472 [10:27:34<17:31:55, 3.80it/s] 35%|███▌ | 131587/371472 [10:27:34<18:50:25, 3.54it/s] 35%|███▌ | 131588/371472 [10:27:35<18:30:40, 3.60it/s] 35%|███▌ | 131589/371472 [10:27:35<18:11:16, 3.66it/s] 35%|███▌ | 131590/371472 [10:27:35<17:46:30, 3.75it/s] 35%|███▌ | 131591/371472 [10:27:35<18:39:52, 3.57it/s] 35%|███▌ | 131592/371472 [10:27:36<18:06:54, 3.68it/s] 35%|███▌ | 131593/371472 [10:27:36<17:38:37, 3.78it/s] 35%|███▌ | 131594/371472 [10:27:36<17:54:42, 3.72it/s] 35%|███▌ | 131595/371472 [10:27:36<17:20:11, 3.84it/s] 35%|███▌ | 131596/371472 [10:27:37<16:55:32, 3.94it/s] 35%|███▌ | 131597/371472 [10:27:37<17:38:16, 3.78it/s] 35%|███▌ | 131598/371472 [10:27:37<17:14:38, 3.86it/s] 35%|███▌ | 131599/371472 [10:27:37<17:12:15, 3.87it/s] 35%|███▌ | 131600/371472 [10:27:38<17:36:36, 3.78it/s] {'loss': 3.154, 'learning_rate': 6.814734211036653e-07, 'epoch': 5.67} + 35%|███▌ | 131600/371472 [10:27:38<17:36:36, 3.78it/s] 35%|███▌ | 131601/371472 [10:27:38<17:26:23, 3.82it/s] 35%|███▌ | 131602/371472 [10:27:38<17:23:33, 3.83it/s] 35%|███▌ | 131603/371472 [10:27:38<17:14:21, 3.87it/s] 35%|███▌ | 131604/371472 [10:27:39<20:24:47, 3.26it/s] 35%|███▌ | 131605/371472 [10:27:39<20:16:19, 3.29it/s] 35%|███▌ | 131606/371472 [10:27:40<20:31:09, 3.25it/s] 35%|███▌ | 131607/371472 [10:27:40<19:11:06, 3.47it/s] 35%|███▌ | 131608/371472 [10:27:40<18:46:15, 3.55it/s] 35%|███▌ | 131609/371472 [10:27:40<18:29:55, 3.60it/s] 35%|███▌ | 131610/371472 [10:27:41<19:13:13, 3.47it/s] 35%|███▌ | 131611/371472 [10:27:41<19:15:12, 3.46it/s] 35%|███▌ | 131612/371472 [10:27:41<19:30:02, 3.42it/s] 35%|███▌ | 131613/371472 [10:27:41<19:04:41, 3.49it/s] 35%|███▌ | 131614/371472 [10:27:42<18:34:32, 3.59it/s] 35%|███▌ | 131615/371472 [10:27:42<17:55:32, 3.72it/s] 35%|███▌ | 131616/371472 [10:27:42<18:59:47, 3.51it/s] 35%|███▌ | 131617/371472 [10:27:43<18:24:42, 3.62it/s] 35%|███▌ | 131618/371472 [10:27:43<18:06:46, 3.68it/s] 35%|███▌ | 131619/371472 [10:27:43<17:40:32, 3.77it/s] 35%|███▌ | 131620/371472 [10:27:43<17:47:31, 3.74it/s] {'loss': 2.9814, 'learning_rate': 6.814249391281863e-07, 'epoch': 5.67} + 35%|███▌ | 131620/371472 [10:27:43<17:47:31, 3.74it/s] 35%|███▌ | 131621/371472 [10:27:44<17:59:36, 3.70it/s] 35%|███▌ | 131622/371472 [10:27:44<18:00:38, 3.70it/s] 35%|███▌ | 131623/371472 [10:27:44<18:12:07, 3.66it/s] 35%|███▌ | 131624/371472 [10:27:44<17:53:28, 3.72it/s] 35%|███▌ | 131625/371472 [10:27:45<18:13:57, 3.65it/s] 35%|███▌ | 131626/371472 [10:27:45<18:12:09, 3.66it/s] 35%|███▌ | 131627/371472 [10:27:45<18:36:07, 3.58it/s] 35%|███▌ | 131628/371472 [10:27:46<18:14:46, 3.65it/s] 35%|███▌ | 131629/371472 [10:27:46<18:21:50, 3.63it/s] 35%|███▌ | 131630/371472 [10:27:46<17:43:37, 3.76it/s] 35%|███▌ | 131631/371472 [10:27:46<19:34:33, 3.40it/s] 35%|███▌ | 131632/371472 [10:27:47<20:04:20, 3.32it/s] 35%|███▌ | 131633/371472 [10:27:47<18:45:59, 3.55it/s] 35%|███▌ | 131634/371472 [10:27:47<19:37:48, 3.39it/s] 35%|███▌ | 131635/371472 [10:27:48<19:27:52, 3.42it/s] 35%|███▌ | 131636/371472 [10:27:48<19:54:15, 3.35it/s] 35%|███▌ | 131637/371472 [10:27:48<22:01:24, 3.02it/s] 35%|███▌ | 131638/371472 [10:27:49<20:54:31, 3.19it/s] 35%|███▌ | 131639/371472 [10:27:49<19:43:31, 3.38it/s] 35%|███▌ | 131640/371472 [10:27:49<19:14:04, 3.46it/s] {'loss': 3.0508, 'learning_rate': 6.813764571527074e-07, 'epoch': 5.67} + 35%|███▌ | 131640/371472 [10:27:49<19:14:04, 3.46it/s] 35%|███▌ | 131641/371472 [10:27:49<18:55:21, 3.52it/s] 35%|███▌ | 131642/371472 [10:27:50<18:16:04, 3.65it/s] 35%|███▌ | 131643/371472 [10:27:50<18:49:31, 3.54it/s] 35%|███▌ | 131644/371472 [10:27:50<18:07:41, 3.67it/s] 35%|███▌ | 131645/371472 [10:27:50<18:14:46, 3.65it/s] 35%|███▌ | 131646/371472 [10:27:51<19:21:59, 3.44it/s] 35%|███▌ | 131647/371472 [10:27:51<20:20:42, 3.27it/s] 35%|███▌ | 131648/371472 [10:27:51<19:59:03, 3.33it/s] 35%|███▌ | 131649/371472 [10:27:52<19:35:23, 3.40it/s] 35%|███▌ | 131650/371472 [10:27:52<18:29:33, 3.60it/s] 35%|███▌ | 131651/371472 [10:27:52<18:44:42, 3.55it/s] 35%|███▌ | 131652/371472 [10:27:53<18:34:48, 3.59it/s] 35%|███▌ | 131653/371472 [10:27:53<19:32:45, 3.41it/s] 35%|███▌ | 131654/371472 [10:27:53<19:28:08, 3.42it/s] 35%|███▌ | 131655/371472 [10:27:53<18:54:59, 3.52it/s] 35%|███▌ | 131656/371472 [10:27:54<19:16:09, 3.46it/s] 35%|███▌ | 131657/371472 [10:27:54<18:57:28, 3.51it/s] 35%|███▌ | 131658/371472 [10:27:54<18:08:53, 3.67it/s] 35%|███▌ | 131659/371472 [10:27:55<19:04:06, 3.49it/s] 35%|███▌ | 131660/371472 [10:27:55<18:30:11, 3.60it/s] {'loss': 3.0222, 'learning_rate': 6.813279751772286e-07, 'epoch': 5.67} + 35%|███▌ | 131660/371472 [10:27:55<18:30:11, 3.60it/s] 35%|███▌ | 131661/371472 [10:27:55<20:04:24, 3.32it/s] 35%|███▌ | 131662/371472 [10:27:55<18:54:33, 3.52it/s] 35%|███▌ | 131663/371472 [10:27:56<19:35:50, 3.40it/s] 35%|███▌ | 131664/371472 [10:27:56<19:02:21, 3.50it/s] 35%|███▌ | 131665/371472 [10:27:56<19:16:50, 3.45it/s] 35%|███▌ | 131666/371472 [10:27:57<18:38:35, 3.57it/s] 35%|███▌ | 131667/371472 [10:27:57<17:45:04, 3.75it/s] 35%|███▌ | 131668/371472 [10:27:57<18:33:46, 3.59it/s] 35%|███▌ | 131669/371472 [10:27:57<19:57:04, 3.34it/s] 35%|███▌ | 131670/371472 [10:27:58<20:03:30, 3.32it/s] 35%|███▌ | 131671/371472 [10:27:58<19:13:07, 3.47it/s] 35%|███▌ | 131672/371472 [10:27:58<19:19:42, 3.45it/s] 35%|███▌ | 131673/371472 [10:27:59<18:52:56, 3.53it/s] 35%|███▌ | 131674/371472 [10:27:59<18:36:51, 3.58it/s] 35%|███▌ | 131675/371472 [10:27:59<18:16:25, 3.65it/s] 35%|███▌ | 131676/371472 [10:27:59<19:17:30, 3.45it/s] 35%|███▌ | 131677/371472 [10:28:00<18:51:14, 3.53it/s] 35%|███▌ | 131678/371472 [10:28:00<18:07:09, 3.68it/s] 35%|███▌ | 131679/371472 [10:28:00<18:24:35, 3.62it/s] 35%|███▌ | 131680/371472 [10:28:00<17:44:02, 3.76it/s] {'loss': 3.3455, 'learning_rate': 6.812794932017497e-07, 'epoch': 5.67} + 35%|███▌ | 131680/371472 [10:28:00<17:44:02, 3.76it/s] 35%|███▌ | 131681/371472 [10:28:01<17:16:53, 3.85it/s] 35%|███▌ | 131682/371472 [10:28:01<16:43:59, 3.98it/s] 35%|███▌ | 131683/371472 [10:28:01<16:49:46, 3.96it/s] 35%|███▌ | 131684/371472 [10:28:01<17:26:57, 3.82it/s] 35%|███▌ | 131685/371472 [10:28:02<18:53:13, 3.53it/s] 35%|███▌ | 131686/371472 [10:28:02<18:36:34, 3.58it/s] 35%|███▌ | 131687/371472 [10:28:02<18:48:30, 3.54it/s] 35%|███▌ | 131688/371472 [10:28:03<19:23:31, 3.43it/s] 35%|███▌ | 131689/371472 [10:28:03<19:24:22, 3.43it/s] 35%|███▌ | 131690/371472 [10:28:03<18:25:09, 3.62it/s] 35%|███▌ | 131691/371472 [10:28:03<18:18:05, 3.64it/s] 35%|███▌ | 131692/371472 [10:28:04<17:25:20, 3.82it/s] 35%|███▌ | 131693/371472 [10:28:04<16:43:23, 3.98it/s] 35%|███▌ | 131694/371472 [10:28:04<17:54:08, 3.72it/s] 35%|███▌ | 131695/371472 [10:28:05<18:00:04, 3.70it/s] 35%|███▌ | 131696/371472 [10:28:05<17:53:00, 3.72it/s] 35%|███▌ | 131697/371472 [10:28:05<17:06:44, 3.89it/s] 35%|███▌ | 131698/371472 [10:28:05<18:18:20, 3.64it/s] 35%|███▌ | 131699/371472 [10:28:06<18:23:47, 3.62it/s] 35%|███▌ | 131700/371472 [10:28:06<19:34:20, 3.40it/s] {'loss': 3.099, 'learning_rate': 6.812310112262706e-07, 'epoch': 5.67} + 35%|███▌ | 131700/371472 [10:28:06<19:34:20, 3.40it/s] 35%|███▌ | 131701/371472 [10:28:06<19:29:12, 3.42it/s] 35%|███▌ | 131702/371472 [10:28:06<18:35:07, 3.58it/s] 35%|███▌ | 131703/371472 [10:28:07<18:33:18, 3.59it/s] 35%|███▌ | 131704/371472 [10:28:07<18:33:50, 3.59it/s] 35%|███▌ | 131705/371472 [10:28:07<18:00:55, 3.70it/s] 35%|███▌ | 131706/371472 [10:28:08<18:49:00, 3.54it/s] 35%|███▌ | 131707/371472 [10:28:08<20:07:53, 3.31it/s] 35%|███▌ | 131708/371472 [10:28:08<19:16:47, 3.45it/s] 35%|███▌ | 131709/371472 [10:28:08<18:36:54, 3.58it/s] 35%|███▌ | 131710/371472 [10:28:09<18:36:30, 3.58it/s] 35%|███▌ | 131711/371472 [10:28:09<17:59:32, 3.70it/s] 35%|███▌ | 131712/371472 [10:28:09<18:17:41, 3.64it/s] 35%|███▌ | 131713/371472 [10:28:10<18:02:18, 3.69it/s] 35%|███▌ | 131714/371472 [10:28:10<19:46:27, 3.37it/s] 35%|███▌ | 131715/371472 [10:28:10<19:52:56, 3.35it/s] 35%|███▌ | 131716/371472 [10:28:10<19:23:57, 3.43it/s] 35%|███▌ | 131717/371472 [10:28:11<19:26:34, 3.43it/s] 35%|███▌ | 131718/371472 [10:28:11<19:22:54, 3.44it/s] 35%|███▌ | 131719/371472 [10:28:11<19:04:24, 3.49it/s] 35%|███▌ | 131720/371472 [10:28:12<21:32:01, 3.09it/s] {'loss': 3.1388, 'learning_rate': 6.811825292507918e-07, 'epoch': 5.67} + 35%|███▌ | 131720/371472 [10:28:12<21:32:01, 3.09it/s] 35%|███▌ | 131721/371472 [10:28:12<20:34:04, 3.24it/s] 35%|███▌ | 131722/371472 [10:28:12<20:07:04, 3.31it/s] 35%|███▌ | 131723/371472 [10:28:13<18:46:56, 3.55it/s] 35%|███▌ | 131724/371472 [10:28:13<18:17:55, 3.64it/s] 35%|███▌ | 131725/371472 [10:28:13<18:52:32, 3.53it/s] 35%|███▌ | 131726/371472 [10:28:13<19:36:47, 3.40it/s] 35%|███▌ | 131727/371472 [10:28:14<18:48:41, 3.54it/s] 35%|███▌ | 131728/371472 [10:28:14<17:54:53, 3.72it/s] 35%|███▌ | 131729/371472 [10:28:14<18:06:59, 3.68it/s] 35%|███▌ | 131730/371472 [10:28:15<19:17:18, 3.45it/s] 35%|███▌ | 131731/371472 [10:28:15<19:09:10, 3.48it/s] 35%|███▌ | 131732/371472 [10:28:15<18:48:12, 3.54it/s] 35%|███▌ | 131733/371472 [10:28:15<18:33:22, 3.59it/s] 35%|███▌ | 131734/371472 [10:28:16<18:28:39, 3.60it/s] 35%|███▌ | 131735/371472 [10:28:16<18:36:45, 3.58it/s] 35%|███▌ | 131736/371472 [10:28:16<18:02:26, 3.69it/s] 35%|███▌ | 131737/371472 [10:28:16<18:19:18, 3.63it/s] 35%|███▌ | 131738/371472 [10:28:17<17:46:16, 3.75it/s] 35%|███▌ | 131739/371472 [10:28:17<17:26:36, 3.82it/s] 35%|███▌ | 131740/371472 [10:28:17<17:26:40, 3.82it/s] {'loss': 3.2224, 'learning_rate': 6.81134047275313e-07, 'epoch': 5.67} + 35%|███▌ | 131740/371472 [10:28:17<17:26:40, 3.82it/s] 35%|███▌ | 131741/371472 [10:28:17<18:09:17, 3.67it/s] 35%|███▌ | 131742/371472 [10:28:18<17:46:08, 3.75it/s] 35%|███▌ | 131743/371472 [10:28:18<17:53:20, 3.72it/s] 35%|███▌ | 131744/371472 [10:28:18<18:07:13, 3.67it/s] 35%|███▌ | 131745/371472 [10:28:19<18:33:13, 3.59it/s] 35%|███▌ | 131746/371472 [10:28:19<18:48:55, 3.54it/s] 35%|███▌ | 131747/371472 [10:28:19<18:06:45, 3.68it/s] 35%|███▌ | 131748/371472 [10:28:19<18:02:04, 3.69it/s] 35%|███▌ | 131749/371472 [10:28:20<19:28:22, 3.42it/s] 35%|███▌ | 131750/371472 [10:28:20<19:23:17, 3.43it/s] 35%|███▌ | 131751/371472 [10:28:20<19:03:20, 3.49it/s] 35%|███▌ | 131752/371472 [10:28:21<18:24:03, 3.62it/s] 35%|███▌ | 131753/371472 [10:28:21<17:53:40, 3.72it/s] 35%|███▌ | 131754/371472 [10:28:21<18:07:07, 3.68it/s] 35%|███▌ | 131755/371472 [10:28:21<18:55:22, 3.52it/s] 35%|███▌ | 131756/371472 [10:28:22<18:25:11, 3.62it/s] 35%|███▌ | 131757/371472 [10:28:22<19:36:08, 3.40it/s] 35%|███▌ | 131758/371472 [10:28:22<21:44:55, 3.06it/s] 35%|███▌ | 131759/371472 [10:28:23<21:49:44, 3.05it/s] 35%|███▌ | 131760/371472 [10:28:23<20:36:23, 3.23it/s] {'loss': 3.2286, 'learning_rate': 6.810855652998341e-07, 'epoch': 5.68} + 35%|███▌ | 131760/371472 [10:28:23<20:36:23, 3.23it/s] 35%|███▌ | 131761/371472 [10:28:23<19:35:48, 3.40it/s] 35%|███▌ | 131762/371472 [10:28:23<18:22:33, 3.62it/s] 35%|███▌ | 131763/371472 [10:28:24<18:50:07, 3.54it/s] 35%|███▌ | 131764/371472 [10:28:24<18:43:26, 3.56it/s] 35%|███▌ | 131765/371472 [10:28:24<18:32:05, 3.59it/s] 35%|███▌ | 131766/371472 [10:28:25<18:56:13, 3.52it/s] 35%|███▌ | 131767/371472 [10:28:25<18:47:47, 3.54it/s] 35%|███▌ | 131768/371472 [10:28:25<17:51:04, 3.73it/s] 35%|███▌ | 131769/371472 [10:28:25<17:32:37, 3.80it/s] 35%|███▌ | 131770/371472 [10:28:26<17:48:02, 3.74it/s] 35%|███▌ | 131771/371472 [10:28:26<17:42:00, 3.76it/s] 35%|███▌ | 131772/371472 [10:28:26<18:08:02, 3.67it/s] 35%|███▌ | 131773/371472 [10:28:26<18:07:13, 3.67it/s] 35%|███▌ | 131774/371472 [10:28:27<17:44:48, 3.75it/s] 35%|███▌ | 131775/371472 [10:28:27<18:45:00, 3.55it/s] 35%|███▌ | 131776/371472 [10:28:27<18:18:53, 3.64it/s] 35%|███▌ | 131777/371472 [10:28:28<18:03:23, 3.69it/s] 35%|███▌ | 131778/371472 [10:28:28<17:55:15, 3.72it/s] 35%|███▌ | 131779/371472 [10:28:28<18:08:51, 3.67it/s] 35%|███▌ | 131780/371472 [10:28:28<18:46:11, 3.55it/s] {'loss': 3.2585, 'learning_rate': 6.810370833243551e-07, 'epoch': 5.68} + 35%|███▌ | 131780/371472 [10:28:28<18:46:11, 3.55it/s] 35%|███▌ | 131781/371472 [10:28:29<19:04:53, 3.49it/s] 35%|███▌ | 131782/371472 [10:28:29<18:22:13, 3.62it/s] 35%|███▌ | 131783/371472 [10:28:29<21:12:05, 3.14it/s] 35%|███▌ | 131784/371472 [10:28:30<21:08:41, 3.15it/s] 35%|███▌ | 131785/371472 [10:28:30<20:14:40, 3.29it/s] 35%|███▌ | 131786/371472 [10:28:30<19:24:31, 3.43it/s] 35%|███▌ | 131787/371472 [10:28:31<19:36:15, 3.40it/s] 35%|███▌ | 131788/371472 [10:28:31<19:13:23, 3.46it/s] 35%|███▌ | 131789/371472 [10:28:31<18:27:28, 3.61it/s] 35%|███▌ | 131790/371472 [10:28:31<18:41:35, 3.56it/s] 35%|███▌ | 131791/371472 [10:28:32<18:08:45, 3.67it/s] 35%|███▌ | 131792/371472 [10:28:32<18:39:47, 3.57it/s] 35%|███▌ | 131793/371472 [10:28:32<17:54:23, 3.72it/s] 35%|███▌ | 131794/371472 [10:28:32<19:00:18, 3.50it/s] 35%|███▌ | 131795/371472 [10:28:33<19:03:02, 3.49it/s] 35%|███▌ | 131796/371472 [10:28:33<19:19:04, 3.45it/s] 35%|███▌ | 131797/371472 [10:28:33<19:08:13, 3.48it/s] 35%|███▌ | 131798/371472 [10:28:34<18:49:54, 3.54it/s] 35%|███▌ | 131799/371472 [10:28:34<20:12:45, 3.29it/s] 35%|███▌ | 131800/371472 [10:28:34<19:09:55, 3.47it/s] {'loss': 3.4365, 'learning_rate': 6.809886013488763e-07, 'epoch': 5.68} + 35%|███▌ | 131800/371472 [10:28:34<19:09:55, 3.47it/s] 35%|███▌ | 131801/371472 [10:28:34<18:40:55, 3.56it/s] 35%|███▌ | 131802/371472 [10:28:35<18:11:14, 3.66it/s] 35%|███▌ | 131803/371472 [10:28:35<17:51:02, 3.73it/s] 35%|███▌ | 131804/371472 [10:28:35<18:20:27, 3.63it/s] 35%|███▌ | 131805/371472 [10:28:36<19:41:50, 3.38it/s] 35%|███▌ | 131806/371472 [10:28:36<19:00:15, 3.50it/s] 35%|███▌ | 131807/371472 [10:28:36<18:17:43, 3.64it/s] 35%|███▌ | 131808/371472 [10:28:36<18:49:23, 3.54it/s] 35%|███▌ | 131809/371472 [10:28:37<18:05:25, 3.68it/s] 35%|███▌ | 131810/371472 [10:28:37<18:33:35, 3.59it/s] 35%|███▌ | 131811/371472 [10:28:37<18:01:30, 3.69it/s] 35%|███▌ | 131812/371472 [10:28:38<18:49:50, 3.54it/s] 35%|███▌ | 131813/371472 [10:28:38<19:20:01, 3.44it/s] 35%|███▌ | 131814/371472 [10:28:38<18:43:52, 3.55it/s] 35%|███▌ | 131815/371472 [10:28:38<19:04:19, 3.49it/s] 35%|███▌ | 131816/371472 [10:28:39<18:19:39, 3.63it/s] 35%|███▌ | 131817/371472 [10:28:39<18:57:24, 3.51it/s] 35%|███▌ | 131818/371472 [10:28:39<19:02:39, 3.50it/s] 35%|███▌ | 131819/371472 [10:28:40<18:41:19, 3.56it/s] 35%|███▌ | 131820/371472 [10:28:40<19:33:42, 3.40it/s] {'loss': 3.1841, 'learning_rate': 6.809401193733974e-07, 'epoch': 5.68} + 35%|███▌ | 131820/371472 [10:28:40<19:33:42, 3.40it/s] 35%|███▌ | 131821/371472 [10:28:40<18:54:13, 3.52it/s] 35%|███▌ | 131822/371472 [10:28:40<18:35:28, 3.58it/s] 35%|███▌ | 131823/371472 [10:28:41<18:23:25, 3.62it/s] 35%|███▌ | 131824/371472 [10:28:41<19:01:50, 3.50it/s] 35%|███▌ | 131825/371472 [10:28:41<18:17:43, 3.64it/s] 35%|███▌ | 131826/371472 [10:28:42<18:53:11, 3.52it/s] 35%|███▌ | 131827/371472 [10:28:42<17:42:40, 3.76it/s] 35%|███▌ | 131828/371472 [10:28:42<17:18:59, 3.84it/s] 35%|███▌ | 131829/371472 [10:28:42<18:52:15, 3.53it/s] 35%|███▌ | 131830/371472 [10:28:43<18:30:40, 3.60it/s] 35%|███▌ | 131831/371472 [10:28:43<17:41:00, 3.76it/s] 35%|███▌ | 131832/371472 [10:28:43<18:15:16, 3.65it/s] 35%|███▌ | 131833/371472 [10:28:43<18:41:36, 3.56it/s] 35%|███▌ | 131834/371472 [10:28:44<18:45:52, 3.55it/s] 35%|███▌ | 131835/371472 [10:28:44<18:50:26, 3.53it/s] 35%|███▌ | 131836/371472 [10:28:44<19:06:11, 3.48it/s] 35%|███▌ | 131837/371472 [10:28:45<19:25:09, 3.43it/s] 35%|███▌ | 131838/371472 [10:28:45<20:41:50, 3.22it/s] 35%|███▌ | 131839/371472 [10:28:45<20:41:39, 3.22it/s] 35%|███▌ | 131840/371472 [10:28:45<19:13:40, 3.46it/s] {'loss': 3.233, 'learning_rate': 6.808916373979185e-07, 'epoch': 5.68} + 35%|███▌ | 131840/371472 [10:28:45<19:13:40, 3.46it/s] 35%|███▌ | 131841/371472 [10:28:46<18:45:41, 3.55it/s] 35%|███▌ | 131842/371472 [10:28:46<18:00:51, 3.70it/s] 35%|███▌ | 131843/371472 [10:28:46<19:23:38, 3.43it/s] 35%|███▌ | 131844/371472 [10:28:47<20:44:22, 3.21it/s] 35%|███▌ | 131845/371472 [10:28:47<19:30:04, 3.41it/s] 35%|███▌ | 131846/371472 [10:28:47<19:10:26, 3.47it/s] 35%|███▌ | 131847/371472 [10:28:47<18:54:07, 3.52it/s] 35%|███▌ | 131848/371472 [10:28:48<18:37:24, 3.57it/s] 35%|███▌ | 131849/371472 [10:28:48<18:28:10, 3.60it/s] 35%|███▌ | 131850/371472 [10:28:48<19:06:46, 3.48it/s] 35%|███▌ | 131851/371472 [10:28:49<19:50:56, 3.35it/s] 35%|███▌ | 131852/371472 [10:28:49<19:41:42, 3.38it/s] 35%|███▌ | 131853/371472 [10:28:49<18:50:06, 3.53it/s] 35%|███▌ | 131854/371472 [10:28:50<19:00:08, 3.50it/s] 35%|███▌ | 131855/371472 [10:28:50<18:44:57, 3.55it/s] 35%|███▌ | 131856/371472 [10:28:50<19:15:07, 3.46it/s] 35%|███▌ | 131857/371472 [10:28:50<19:14:59, 3.46it/s] 35%|███▌ | 131858/371472 [10:28:51<20:35:19, 3.23it/s] 35%|███▌ | 131859/371472 [10:28:51<19:22:28, 3.44it/s] 35%|███▌ | 131860/371472 [10:28:51<18:59:29, 3.50it/s] {'loss': 3.2861, 'learning_rate': 6.808431554224395e-07, 'epoch': 5.68} + 35%|███▌ | 131860/371472 [10:28:51<18:59:29, 3.50it/s] 35%|███▌ | 131861/371472 [10:28:52<19:15:50, 3.46it/s] 35%|███▌ | 131862/371472 [10:28:52<19:19:38, 3.44it/s] 35%|███▌ | 131863/371472 [10:28:52<18:39:57, 3.57it/s] 35%|███▌ | 131864/371472 [10:28:52<19:22:56, 3.43it/s] 35%|███▌ | 131865/371472 [10:28:53<18:22:36, 3.62it/s] 35%|███▌ | 131866/371472 [10:28:53<18:15:22, 3.65it/s] 35%|███▌ | 131867/371472 [10:28:53<18:32:13, 3.59it/s] 35%|███▌ | 131868/371472 [10:28:53<18:15:42, 3.64it/s] 35%|███▌ | 131869/371472 [10:28:54<18:15:04, 3.65it/s] 35%|███▌ | 131870/371472 [10:28:54<17:53:49, 3.72it/s] 35%|███▌ | 131871/371472 [10:28:54<18:08:07, 3.67it/s] 35%|███▌ | 131872/371472 [10:28:55<18:25:52, 3.61it/s] 36%|███▌ | 131873/371472 [10:28:55<18:07:26, 3.67it/s] 36%|███▌ | 131874/371472 [10:28:55<17:53:53, 3.72it/s] 36%|███▌ | 131875/371472 [10:28:55<18:51:27, 3.53it/s] 36%|███▌ | 131876/371472 [10:28:56<19:33:20, 3.40it/s] 36%|███▌ | 131877/371472 [10:28:56<18:43:40, 3.55it/s] 36%|███▌ | 131878/371472 [10:28:56<18:15:21, 3.65it/s] 36%|███▌ | 131879/371472 [10:28:57<18:20:18, 3.63it/s] 36%|███▌ | 131880/371472 [10:28:57<18:10:40, 3.66it/s] {'loss': 3.2726, 'learning_rate': 6.807946734469607e-07, 'epoch': 5.68} + 36%|███▌ | 131880/371472 [10:28:57<18:10:40, 3.66it/s] 36%|███▌ | 131881/371472 [10:28:57<19:05:36, 3.49it/s] 36%|███▌ | 131882/371472 [10:28:57<18:13:07, 3.65it/s] 36%|███▌ | 131883/371472 [10:28:58<18:26:57, 3.61it/s] 36%|███▌ | 131884/371472 [10:28:58<19:13:48, 3.46it/s] 36%|███▌ | 131885/371472 [10:28:58<19:41:34, 3.38it/s] 36%|███▌ | 131886/371472 [10:28:59<19:14:59, 3.46it/s] 36%|███▌ | 131887/371472 [10:28:59<23:33:23, 2.83it/s] 36%|███▌ | 131888/371472 [10:28:59<22:18:44, 2.98it/s] 36%|███▌ | 131889/371472 [10:29:00<21:04:52, 3.16it/s] 36%|███▌ | 131890/371472 [10:29:00<19:57:44, 3.33it/s] 36%|███▌ | 131891/371472 [10:29:00<19:28:48, 3.42it/s] 36%|███▌ | 131892/371472 [10:29:00<19:25:08, 3.43it/s] 36%|███▌ | 131893/371472 [10:29:01<19:02:18, 3.50it/s] 36%|███▌ | 131894/371472 [10:29:01<18:58:28, 3.51it/s] 36%|███▌ | 131895/371472 [10:29:01<18:54:18, 3.52it/s] 36%|███▌ | 131896/371472 [10:29:02<18:32:45, 3.59it/s] 36%|███▌ | 131897/371472 [10:29:02<19:18:48, 3.45it/s] 36%|███▌ | 131898/371472 [10:29:02<20:59:56, 3.17it/s] 36%|███▌ | 131899/371472 [10:29:03<20:13:22, 3.29it/s] 36%|███▌ | 131900/371472 [10:29:03<20:07:02, 3.31it/s] {'loss': 3.0955, 'learning_rate': 6.807461914714819e-07, 'epoch': 5.68} + 36%|███▌ | 131900/371472 [10:29:03<20:07:02, 3.31it/s] 36%|███▌ | 131901/371472 [10:29:03<20:05:38, 3.31it/s] 36%|███▌ | 131902/371472 [10:29:03<20:01:53, 3.32it/s] 36%|███▌ | 131903/371472 [10:29:04<18:54:10, 3.52it/s] 36%|███▌ | 131904/371472 [10:29:04<18:57:14, 3.51it/s] 36%|███▌ | 131905/371472 [10:29:04<18:42:31, 3.56it/s] 36%|███▌ | 131906/371472 [10:29:05<19:58:35, 3.33it/s] 36%|███▌ | 131907/371472 [10:29:05<20:40:43, 3.22it/s] 36%|███▌ | 131908/371472 [10:29:05<21:44:14, 3.06it/s] 36%|███▌ | 131909/371472 [10:29:06<21:01:02, 3.17it/s] 36%|███▌ | 131910/371472 [10:29:06<20:08:06, 3.30it/s] 36%|███▌ | 131911/371472 [10:29:06<19:20:02, 3.44it/s] 36%|███▌ | 131912/371472 [10:29:06<18:57:34, 3.51it/s] 36%|███▌ | 131913/371472 [10:29:07<18:54:42, 3.52it/s] 36%|███▌ | 131914/371472 [10:29:07<20:06:18, 3.31it/s] 36%|███▌ | 131915/371472 [10:29:07<19:04:48, 3.49it/s] 36%|███▌ | 131916/371472 [10:29:07<18:38:02, 3.57it/s] 36%|███▌ | 131917/371472 [10:29:08<19:24:15, 3.43it/s] 36%|███▌ | 131918/371472 [10:29:08<18:36:46, 3.58it/s] 36%|███▌ | 131919/371472 [10:29:08<18:39:34, 3.57it/s] 36%|███▌ | 131920/371472 [10:29:09<17:45:46, 3.75it/s] {'loss': 3.2073, 'learning_rate': 6.806977094960029e-07, 'epoch': 5.68} + 36%|███▌ | 131920/371472 [10:29:09<17:45:46, 3.75it/s] 36%|███▌ | 131921/371472 [10:29:09<17:51:22, 3.73it/s] 36%|███▌ | 131922/371472 [10:29:09<18:12:58, 3.65it/s] 36%|███▌ | 131923/371472 [10:29:09<18:47:31, 3.54it/s] 36%|███▌ | 131924/371472 [10:29:10<19:02:33, 3.49it/s] 36%|███▌ | 131925/371472 [10:29:10<17:59:41, 3.70it/s] 36%|███▌ | 131926/371472 [10:29:10<17:36:17, 3.78it/s] 36%|███▌ | 131927/371472 [10:29:10<17:35:38, 3.78it/s] 36%|███▌ | 131928/371472 [10:29:11<18:01:29, 3.69it/s] 36%|███▌ | 131929/371472 [10:29:11<17:59:09, 3.70it/s] 36%|███▌ | 131930/371472 [10:29:11<21:15:11, 3.13it/s] 36%|███▌ | 131931/371472 [10:29:12<20:14:07, 3.29it/s] 36%|███▌ | 131932/371472 [10:29:12<19:09:58, 3.47it/s] 36%|███▌ | 131933/371472 [10:29:12<18:57:34, 3.51it/s] 36%|███▌ | 131934/371472 [10:29:13<18:12:59, 3.65it/s] 36%|███▌ | 131935/371472 [10:29:13<18:17:53, 3.64it/s] 36%|███▌ | 131936/371472 [10:29:13<19:29:24, 3.41it/s] 36%|███▌ | 131937/371472 [10:29:13<18:57:38, 3.51it/s] 36%|███▌ | 131938/371472 [10:29:14<19:00:48, 3.50it/s] 36%|███▌ | 131939/371472 [10:29:14<18:02:45, 3.69it/s] 36%|███▌ | 131940/371472 [10:29:14<18:11:06, 3.66it/s] {'loss': 3.3188, 'learning_rate': 6.80649227520524e-07, 'epoch': 5.68} + 36%|███▌ | 131940/371472 [10:29:14<18:11:06, 3.66it/s] 36%|███▌ | 131941/371472 [10:29:14<18:05:09, 3.68it/s] 36%|███▌ | 131942/371472 [10:29:15<19:14:42, 3.46it/s] 36%|███▌ | 131943/371472 [10:29:15<19:15:21, 3.46it/s] 36%|███▌ | 131944/371472 [10:29:15<19:51:09, 3.35it/s] 36%|███▌ | 131945/371472 [10:29:16<18:48:46, 3.54it/s] 36%|███▌ | 131946/371472 [10:29:16<18:41:07, 3.56it/s] 36%|███▌ | 131947/371472 [10:29:16<20:08:29, 3.30it/s] 36%|███▌ | 131948/371472 [10:29:17<19:19:07, 3.44it/s] 36%|███▌ | 131949/371472 [10:29:17<19:01:42, 3.50it/s] 36%|███▌ | 131950/371472 [10:29:17<18:22:16, 3.62it/s] 36%|███▌ | 131951/371472 [10:29:17<19:01:00, 3.50it/s] 36%|███▌ | 131952/371472 [10:29:18<18:54:46, 3.52it/s] 36%|███▌ | 131953/371472 [10:29:18<19:25:06, 3.43it/s] 36%|███▌ | 131954/371472 [10:29:18<18:42:17, 3.56it/s] 36%|███▌ | 131955/371472 [10:29:19<19:36:28, 3.39it/s] 36%|███▌ | 131956/371472 [10:29:19<18:48:59, 3.54it/s] 36%|███▌ | 131957/371472 [10:29:19<19:23:09, 3.43it/s] 36%|███▌ | 131958/371472 [10:29:19<18:57:39, 3.51it/s] 36%|███▌ | 131959/371472 [10:29:20<18:28:31, 3.60it/s] 36%|███▌ | 131960/371472 [10:29:20<18:53:51, 3.52it/s] {'loss': 3.0318, 'learning_rate': 6.806007455450451e-07, 'epoch': 5.68} + 36%|███▌ | 131960/371472 [10:29:20<18:53:51, 3.52it/s] 36%|███▌ | 131961/371472 [10:29:20<18:30:22, 3.60it/s] 36%|███▌ | 131962/371472 [10:29:20<18:24:53, 3.61it/s] 36%|███▌ | 131963/371472 [10:29:21<18:32:42, 3.59it/s] 36%|███▌ | 131964/371472 [10:29:21<17:45:56, 3.74it/s] 36%|███▌ | 131965/371472 [10:29:21<17:49:57, 3.73it/s] 36%|███▌ | 131966/371472 [10:29:22<17:29:22, 3.80it/s] 36%|███▌ | 131967/371472 [10:29:22<17:02:15, 3.90it/s] 36%|███▌ | 131968/371472 [10:29:22<17:21:35, 3.83it/s] 36%|███▌ | 131969/371472 [10:29:22<18:15:44, 3.64it/s] 36%|███▌ | 131970/371472 [10:29:23<18:10:08, 3.66it/s] 36%|███▌ | 131971/371472 [10:29:23<17:34:48, 3.78it/s] 36%|███▌ | 131972/371472 [10:29:23<17:28:01, 3.81it/s] 36%|███▌ | 131973/371472 [10:29:23<17:19:32, 3.84it/s] 36%|███▌ | 131974/371472 [10:29:24<19:09:52, 3.47it/s] 36%|███▌ | 131975/371472 [10:29:24<18:53:43, 3.52it/s] 36%|███▌ | 131976/371472 [10:29:24<18:41:57, 3.56it/s] 36%|███▌ | 131977/371472 [10:29:25<18:49:56, 3.53it/s] 36%|███▌ | 131978/371472 [10:29:25<20:15:10, 3.28it/s] 36%|███▌ | 131979/371472 [10:29:25<21:45:14, 3.06it/s] 36%|███▌ | 131980/371472 [10:29:26<20:23:12, 3.26it/s] {'loss': 3.1203, 'learning_rate': 6.805522635695662e-07, 'epoch': 5.68} + 36%|███▌ | 131980/371472 [10:29:26<20:23:12, 3.26it/s] 36%|███▌ | 131981/371472 [10:29:26<19:15:44, 3.45it/s] 36%|███▌ | 131982/371472 [10:29:26<18:55:31, 3.52it/s] 36%|███▌ | 131983/371472 [10:29:26<19:15:46, 3.45it/s] 36%|███▌ | 131984/371472 [10:29:27<20:08:23, 3.30it/s] 36%|███▌ | 131985/371472 [10:29:27<19:08:34, 3.48it/s] 36%|███▌ | 131986/371472 [10:29:27<18:33:57, 3.58it/s] 36%|███▌ | 131987/371472 [10:29:28<18:34:15, 3.58it/s] 36%|███▌ | 131988/371472 [10:29:28<17:46:52, 3.74it/s] 36%|███▌ | 131989/371472 [10:29:28<17:45:30, 3.75it/s] 36%|███▌ | 131990/371472 [10:29:28<17:49:47, 3.73it/s] 36%|███▌ | 131991/371472 [10:29:29<17:25:17, 3.82it/s] 36%|███▌ | 131992/371472 [10:29:29<17:24:21, 3.82it/s] 36%|███▌ | 131993/371472 [10:29:29<17:22:11, 3.83it/s] 36%|███▌ | 131994/371472 [10:29:29<17:20:25, 3.84it/s] 36%|███▌ | 131995/371472 [10:29:30<17:07:33, 3.88it/s] 36%|███▌ | 131996/371472 [10:29:30<17:25:49, 3.82it/s] 36%|███▌ | 131997/371472 [10:29:30<17:27:23, 3.81it/s] 36%|███▌ | 131998/371472 [10:29:30<19:42:02, 3.38it/s] 36%|███▌ | 131999/371472 [10:29:31<19:13:07, 3.46it/s] 36%|███▌ | 132000/371472 [10:29:31<19:19:54, 3.44it/s] {'loss': 3.1966, 'learning_rate': 6.805037815940872e-07, 'epoch': 5.69} + 36%|███▌ | 132000/371472 [10:29:31<19:19:54, 3.44it/s] 36%|███▌ | 132001/371472 [10:29:31<18:37:25, 3.57it/s] 36%|███▌ | 132002/371472 [10:29:32<18:35:14, 3.58it/s] 36%|███▌ | 132003/371472 [10:29:32<18:28:23, 3.60it/s] 36%|███▌ | 132004/371472 [10:29:32<18:24:09, 3.61it/s] 36%|███▌ | 132005/371472 [10:29:32<18:39:31, 3.57it/s] 36%|███▌ | 132006/371472 [10:29:33<18:09:37, 3.66it/s] 36%|███▌ | 132007/371472 [10:29:33<18:21:47, 3.62it/s] 36%|███▌ | 132008/371472 [10:29:33<17:49:24, 3.73it/s] 36%|███▌ | 132009/371472 [10:29:33<18:01:23, 3.69it/s] 36%|███▌ | 132010/371472 [10:29:34<18:30:54, 3.59it/s] 36%|███▌ | 132011/371472 [10:29:34<18:05:45, 3.68it/s] 36%|███▌ | 132012/371472 [10:29:34<17:32:30, 3.79it/s] 36%|███▌ | 132013/371472 [10:29:35<17:09:49, 3.88it/s] 36%|███▌ | 132014/371472 [10:29:35<17:37:47, 3.77it/s] 36%|███▌ | 132015/371472 [10:29:35<18:24:30, 3.61it/s] 36%|███▌ | 132016/371472 [10:29:35<20:26:57, 3.25it/s] 36%|███▌ | 132017/371472 [10:29:36<19:09:46, 3.47it/s] 36%|███▌ | 132018/371472 [10:29:36<20:30:10, 3.24it/s] 36%|███▌ | 132019/371472 [10:29:36<18:59:53, 3.50it/s] 36%|███▌ | 132020/371472 [10:29:37<20:28:30, 3.25it/s] {'loss': 3.1344, 'learning_rate': 6.804552996186083e-07, 'epoch': 5.69} + 36%|███▌ | 132020/371472 [10:29:37<20:28:30, 3.25it/s] 36%|███▌ | 132021/371472 [10:29:37<20:03:15, 3.32it/s] 36%|███▌ | 132022/371472 [10:29:37<19:24:52, 3.43it/s] 36%|███▌ | 132023/371472 [10:29:38<18:57:33, 3.51it/s] 36%|███▌ | 132024/371472 [10:29:38<18:26:49, 3.61it/s] 36%|███▌ | 132025/371472 [10:29:38<18:25:36, 3.61it/s] 36%|███▌ | 132026/371472 [10:29:38<18:32:42, 3.59it/s] 36%|███▌ | 132027/371472 [10:29:39<19:54:21, 3.34it/s] 36%|███▌ | 132028/371472 [10:29:39<18:58:20, 3.51it/s] 36%|███▌ | 132029/371472 [10:29:39<18:24:39, 3.61it/s] 36%|███▌ | 132030/371472 [10:29:39<19:09:38, 3.47it/s] 36%|███▌ | 132031/371472 [10:29:40<19:29:47, 3.41it/s] 36%|███▌ | 132032/371472 [10:29:40<18:59:40, 3.50it/s] 36%|███▌ | 132033/371472 [10:29:40<18:42:25, 3.56it/s] 36%|███▌ | 132034/371472 [10:29:41<17:55:24, 3.71it/s] 36%|███▌ | 132035/371472 [10:29:41<17:56:53, 3.71it/s] 36%|███▌ | 132036/371472 [10:29:41<17:36:59, 3.78it/s] 36%|███▌ | 132037/371472 [10:29:41<17:57:10, 3.70it/s] 36%|███▌ | 132038/371472 [10:29:42<17:53:57, 3.72it/s] 36%|███▌ | 132039/371472 [10:29:42<17:53:33, 3.72it/s] 36%|███▌ | 132040/371472 [10:29:42<17:49:49, 3.73it/s] {'loss': 3.2833, 'learning_rate': 6.804068176431296e-07, 'epoch': 5.69} + 36%|███▌ | 132040/371472 [10:29:42<17:49:49, 3.73it/s] 36%|███▌ | 132041/371472 [10:29:42<17:34:04, 3.79it/s] 36%|███▌ | 132042/371472 [10:29:43<17:58:08, 3.70it/s] 36%|███▌ | 132043/371472 [10:29:43<18:55:46, 3.51it/s] 36%|███▌ | 132044/371472 [10:29:43<18:41:47, 3.56it/s] 36%|███▌ | 132045/371472 [10:29:44<17:55:15, 3.71it/s] 36%|███▌ | 132046/371472 [10:29:44<17:57:18, 3.70it/s] 36%|███▌ | 132047/371472 [10:29:44<17:30:40, 3.80it/s] 36%|███▌ | 132048/371472 [10:29:44<18:15:19, 3.64it/s] 36%|███▌ | 132049/371472 [10:29:45<18:04:00, 3.68it/s] 36%|███▌ | 132050/371472 [10:29:45<17:29:35, 3.80it/s] 36%|███▌ | 132051/371472 [10:29:45<18:08:13, 3.67it/s] 36%|███▌ | 132052/371472 [10:29:45<17:55:02, 3.71it/s] 36%|███▌ | 132053/371472 [10:29:46<17:53:57, 3.72it/s] 36%|███▌ | 132054/371472 [10:29:46<17:42:08, 3.76it/s] 36%|███▌ | 132055/371472 [10:29:46<18:29:34, 3.60it/s] 36%|███▌ | 132056/371472 [10:29:47<18:30:20, 3.59it/s] 36%|███▌ | 132057/371472 [10:29:47<19:11:05, 3.47it/s] 36%|███▌ | 132058/371472 [10:29:47<19:47:57, 3.36it/s] 36%|███▌ | 132059/371472 [10:29:47<19:00:15, 3.50it/s] 36%|███▌ | 132060/371472 [10:29:48<18:19:31, 3.63it/s] {'loss': 3.1791, 'learning_rate': 6.803583356676507e-07, 'epoch': 5.69} + 36%|███▌ | 132060/371472 [10:29:48<18:19:31, 3.63it/s] 36%|███▌ | 132061/371472 [10:29:48<17:52:53, 3.72it/s] 36%|███▌ | 132062/371472 [10:29:48<18:16:20, 3.64it/s] 36%|███▌ | 132063/371472 [10:29:49<18:08:46, 3.66it/s] 36%|███▌ | 132064/371472 [10:29:49<17:38:21, 3.77it/s] 36%|███▌ | 132065/371472 [10:29:49<18:16:41, 3.64it/s] 36%|███▌ | 132066/371472 [10:29:49<18:09:04, 3.66it/s] 36%|███▌ | 132067/371472 [10:29:50<18:03:40, 3.68it/s] 36%|███▌ | 132068/371472 [10:29:50<19:28:45, 3.41it/s] 36%|███▌ | 132069/371472 [10:29:50<19:13:26, 3.46it/s] 36%|███▌ | 132070/371472 [10:29:50<18:44:20, 3.55it/s] 36%|███▌ | 132071/371472 [10:29:51<18:32:52, 3.59it/s] 36%|███▌ | 132072/371472 [10:29:51<18:24:28, 3.61it/s] 36%|███▌ | 132073/371472 [10:29:51<18:45:44, 3.54it/s] 36%|███▌ | 132074/371472 [10:29:52<18:30:56, 3.59it/s] 36%|███▌ | 132075/371472 [10:29:52<18:01:15, 3.69it/s] 36%|███▌ | 132076/371472 [10:29:52<17:29:53, 3.80it/s] 36%|███▌ | 132077/371472 [10:29:52<17:23:51, 3.82it/s] 36%|███▌ | 132078/371472 [10:29:53<17:16:03, 3.85it/s] 36%|███▌ | 132079/371472 [10:29:53<17:31:02, 3.80it/s] 36%|███▌ | 132080/371472 [10:29:53<18:29:05, 3.60it/s] {'loss': 3.1291, 'learning_rate': 6.803098536921716e-07, 'epoch': 5.69} + 36%|███▌ | 132080/371472 [10:29:53<18:29:05, 3.60it/s] 36%|███▌ | 132081/371472 [10:29:53<19:02:16, 3.49it/s] 36%|███▌ | 132082/371472 [10:29:54<18:20:56, 3.62it/s] 36%|███▌ | 132083/371472 [10:29:54<17:47:51, 3.74it/s] 36%|███▌ | 132084/371472 [10:29:54<18:02:45, 3.68it/s] 36%|███▌ | 132085/371472 [10:29:55<17:34:33, 3.78it/s] 36%|███▌ | 132086/371472 [10:29:55<17:35:31, 3.78it/s] 36%|███▌ | 132087/371472 [10:29:55<17:40:12, 3.76it/s] 36%|███▌ | 132088/371472 [10:29:55<17:20:34, 3.83it/s] 36%|███▌ | 132089/371472 [10:29:56<17:05:43, 3.89it/s] 36%|███▌ | 132090/371472 [10:29:56<17:02:11, 3.90it/s] 36%|███▌ | 132091/371472 [10:29:56<17:16:51, 3.85it/s] 36%|███▌ | 132092/371472 [10:29:56<18:04:59, 3.68it/s] 36%|███▌ | 132093/371472 [10:29:57<17:54:25, 3.71it/s] 36%|███▌ | 132094/371472 [10:29:57<18:23:05, 3.62it/s] 36%|███▌ | 132095/371472 [10:29:57<17:47:50, 3.74it/s] 36%|███▌ | 132096/371472 [10:29:57<17:44:14, 3.75it/s] 36%|███▌ | 132097/371472 [10:29:58<17:42:54, 3.75it/s] 36%|███▌ | 132098/371472 [10:29:58<16:58:48, 3.92it/s] 36%|███▌ | 132099/371472 [10:29:58<17:01:17, 3.91it/s] 36%|███▌ | 132100/371472 [10:29:58<16:48:45, 3.95it/s] {'loss': 3.2564, 'learning_rate': 6.802613717166928e-07, 'epoch': 5.69} + 36%|███▌ | 132100/371472 [10:29:58<16:48:45, 3.95it/s] 36%|███▌ | 132101/371472 [10:29:59<16:45:20, 3.97it/s] 36%|███▌ | 132102/371472 [10:29:59<16:52:09, 3.94it/s] 36%|███▌ | 132103/371472 [10:29:59<17:46:29, 3.74it/s] 36%|███▌ | 132104/371472 [10:30:00<17:39:23, 3.77it/s] 36%|███▌ | 132105/371472 [10:30:00<19:58:00, 3.33it/s] 36%|███▌ | 132106/371472 [10:30:00<19:06:20, 3.48it/s] 36%|███▌ | 132107/371472 [10:30:00<18:47:59, 3.54it/s] 36%|███▌ | 132108/371472 [10:30:01<18:23:00, 3.62it/s] 36%|███▌ | 132109/371472 [10:30:01<17:56:56, 3.70it/s] 36%|███▌ | 132110/371472 [10:30:01<18:12:58, 3.65it/s] 36%|███▌ | 132111/371472 [10:30:02<18:26:35, 3.61it/s] 36%|███▌ | 132112/371472 [10:30:02<17:42:57, 3.75it/s] 36%|███▌ | 132113/371472 [10:30:02<17:53:55, 3.71it/s] 36%|███▌ | 132114/371472 [10:30:02<17:54:59, 3.71it/s] 36%|███▌ | 132115/371472 [10:30:03<21:26:48, 3.10it/s] 36%|███▌ | 132116/371472 [10:30:03<20:35:19, 3.23it/s] 36%|███▌ | 132117/371472 [10:30:03<19:36:41, 3.39it/s] 36%|███▌ | 132118/371472 [10:30:04<20:08:04, 3.30it/s] 36%|███▌ | 132119/371472 [10:30:04<19:23:30, 3.43it/s] 36%|███▌ | 132120/371472 [10:30:04<18:20:20, 3.63it/s] {'loss': 3.3028, 'learning_rate': 6.802128897412141e-07, 'epoch': 5.69} + 36%|███▌ | 132120/371472 [10:30:04<18:20:20, 3.63it/s] 36%|███▌ | 132121/371472 [10:30:04<18:09:33, 3.66it/s] 36%|███▌ | 132122/371472 [10:30:05<18:02:18, 3.69it/s] 36%|███▌ | 132123/371472 [10:30:05<18:01:54, 3.69it/s] 36%|███▌ | 132124/371472 [10:30:05<17:46:19, 3.74it/s] 36%|███▌ | 132125/371472 [10:30:05<17:46:40, 3.74it/s] 36%|███▌ | 132126/371472 [10:30:06<17:27:53, 3.81it/s] 36%|███▌ | 132127/371472 [10:30:06<21:41:45, 3.06it/s] 36%|███▌ | 132128/371472 [10:30:06<20:21:26, 3.27it/s] 36%|███▌ | 132129/371472 [10:30:07<20:50:21, 3.19it/s] 36%|███▌ | 132130/371472 [10:30:07<19:08:12, 3.47it/s] 36%|███▌ | 132131/371472 [10:30:07<18:08:27, 3.66it/s] 36%|███▌ | 132132/371472 [10:30:07<17:34:36, 3.78it/s] 36%|███▌ | 132133/371472 [10:30:08<17:40:19, 3.76it/s] 36%|███▌ | 132134/371472 [10:30:08<18:02:27, 3.69it/s] 36%|███▌ | 132135/371472 [10:30:08<19:47:31, 3.36it/s] 36%|███▌ | 132136/371472 [10:30:09<20:29:20, 3.24it/s] 36%|███▌ | 132137/371472 [10:30:09<20:08:09, 3.30it/s] 36%|███▌ | 132138/371472 [10:30:09<21:47:01, 3.05it/s] 36%|███▌ | 132139/371472 [10:30:10<20:52:08, 3.19it/s] 36%|███▌ | 132140/371472 [10:30:10<20:23:31, 3.26it/s] {'loss': 3.0871, 'learning_rate': 6.801644077657351e-07, 'epoch': 5.69} + 36%|███▌ | 132140/371472 [10:30:10<20:23:31, 3.26it/s] 36%|███▌ | 132141/371472 [10:30:10<19:39:30, 3.38it/s] 36%|███▌ | 132142/371472 [10:30:10<19:07:54, 3.47it/s] 36%|███▌ | 132143/371472 [10:30:11<18:28:50, 3.60it/s] 36%|███▌ | 132144/371472 [10:30:11<18:29:07, 3.60it/s] 36%|███▌ | 132145/371472 [10:30:11<17:54:47, 3.71it/s] 36%|███▌ | 132146/371472 [10:30:12<18:10:25, 3.66it/s] 36%|███▌ | 132147/371472 [10:30:12<18:19:54, 3.63it/s] 36%|███▌ | 132148/371472 [10:30:12<17:56:17, 3.71it/s] 36%|███▌ | 132149/371472 [10:30:12<17:47:27, 3.74it/s] 36%|███▌ | 132150/371472 [10:30:13<18:56:48, 3.51it/s] 36%|███▌ | 132151/371472 [10:30:13<19:44:10, 3.37it/s] 36%|███▌ | 132152/371472 [10:30:13<20:43:46, 3.21it/s] 36%|███▌ | 132153/371472 [10:30:14<19:54:34, 3.34it/s] 36%|███▌ | 132154/371472 [10:30:14<19:19:04, 3.44it/s] 36%|███▌ | 132155/371472 [10:30:14<19:23:54, 3.43it/s] 36%|███▌ | 132156/371472 [10:30:14<19:23:42, 3.43it/s] 36%|███▌ | 132157/371472 [10:30:15<18:48:34, 3.53it/s] 36%|███▌ | 132158/371472 [10:30:15<18:59:12, 3.50it/s] 36%|███▌ | 132159/371472 [10:30:15<19:38:50, 3.38it/s] 36%|███▌ | 132160/371472 [10:30:16<18:58:15, 3.50it/s] {'loss': 3.2126, 'learning_rate': 6.801159257902561e-07, 'epoch': 5.69} + 36%|███▌ | 132160/371472 [10:30:16<18:58:15, 3.50it/s] 36%|███▌ | 132161/371472 [10:30:16<18:25:27, 3.61it/s] 36%|███▌ | 132162/371472 [10:30:16<18:49:55, 3.53it/s] 36%|███▌ | 132163/371472 [10:30:16<19:29:22, 3.41it/s] 36%|███▌ | 132164/371472 [10:30:17<19:22:14, 3.43it/s] 36%|███▌ | 132165/371472 [10:30:17<18:50:32, 3.53it/s] 36%|███▌ | 132166/371472 [10:30:17<18:40:07, 3.56it/s] 36%|███▌ | 132167/371472 [10:30:18<18:24:33, 3.61it/s] 36%|███▌ | 132168/371472 [10:30:18<17:53:08, 3.72it/s] 36%|███▌ | 132169/371472 [10:30:18<19:12:40, 3.46it/s] 36%|███▌ | 132170/371472 [10:30:18<18:50:11, 3.53it/s] 36%|███▌ | 132171/371472 [10:30:19<19:01:45, 3.49it/s] 36%|███▌ | 132172/371472 [10:30:19<18:24:13, 3.61it/s] 36%|███▌ | 132173/371472 [10:30:19<20:19:42, 3.27it/s] 36%|███▌ | 132174/371472 [10:30:20<21:01:05, 3.16it/s] 36%|███▌ | 132175/371472 [10:30:20<19:45:12, 3.37it/s] 36%|███▌ | 132176/371472 [10:30:20<21:05:34, 3.15it/s] 36%|███▌ | 132177/371472 [10:30:21<21:39:13, 3.07it/s] 36%|███▌ | 132178/371472 [10:30:21<20:34:30, 3.23it/s] 36%|███▌ | 132179/371472 [10:30:21<20:57:42, 3.17it/s] 36%|███▌ | 132180/371472 [10:30:22<20:46:23, 3.20it/s] {'loss': 3.1723, 'learning_rate': 6.800674438147773e-07, 'epoch': 5.69} + 36%|███▌ | 132180/371472 [10:30:22<20:46:23, 3.20it/s] 36%|███▌ | 132181/371472 [10:30:22<19:33:02, 3.40it/s] 36%|███▌ | 132182/371472 [10:30:22<19:14:24, 3.45it/s] 36%|███▌ | 132183/371472 [10:30:22<18:52:49, 3.52it/s] 36%|███▌ | 132184/371472 [10:30:23<18:56:06, 3.51it/s] 36%|███▌ | 132185/371472 [10:30:23<19:54:35, 3.34it/s] 36%|███▌ | 132186/371472 [10:30:23<19:25:05, 3.42it/s] 36%|███▌ | 132187/371472 [10:30:24<18:33:39, 3.58it/s] 36%|███▌ | 132188/371472 [10:30:24<18:45:21, 3.54it/s] 36%|███▌ | 132189/371472 [10:30:24<18:24:30, 3.61it/s] 36%|███▌ | 132190/371472 [10:30:24<18:34:10, 3.58it/s] 36%|███▌ | 132191/371472 [10:30:25<18:06:01, 3.67it/s] 36%|███▌ | 132192/371472 [10:30:25<19:25:12, 3.42it/s] 36%|███▌ | 132193/371472 [10:30:25<18:35:37, 3.57it/s] 36%|███▌ | 132194/371472 [10:30:25<18:07:06, 3.67it/s] 36%|███▌ | 132195/371472 [10:30:26<18:38:41, 3.56it/s] 36%|███▌ | 132196/371472 [10:30:26<18:28:28, 3.60it/s] 36%|███▌ | 132197/371472 [10:30:26<19:30:15, 3.41it/s] 36%|███▌ | 132198/371472 [10:30:27<19:58:34, 3.33it/s] 36%|███▌ | 132199/371472 [10:30:27<19:02:47, 3.49it/s] 36%|███▌ | 132200/371472 [10:30:27<18:27:52, 3.60it/s] {'loss': 3.1666, 'learning_rate': 6.800189618392984e-07, 'epoch': 5.69} + 36%|███▌ | 132200/371472 [10:30:27<18:27:52, 3.60it/s] 36%|███▌ | 132201/371472 [10:30:27<18:50:06, 3.53it/s] 36%|███▌ | 132202/371472 [10:30:28<18:17:01, 3.64it/s] 36%|███▌ | 132203/371472 [10:30:28<18:27:39, 3.60it/s] 36%|███▌ | 132204/371472 [10:30:28<19:04:34, 3.48it/s] 36%|███▌ | 132205/371472 [10:30:29<18:49:10, 3.53it/s] 36%|███▌ | 132206/371472 [10:30:29<18:32:45, 3.58it/s] 36%|███▌ | 132207/371472 [10:30:29<18:32:39, 3.58it/s] 36%|███▌ | 132208/371472 [10:30:29<18:47:57, 3.54it/s] 36%|███▌ | 132209/371472 [10:30:30<17:57:59, 3.70it/s] 36%|███▌ | 132210/371472 [10:30:30<18:23:51, 3.61it/s] 36%|███▌ | 132211/371472 [10:30:30<18:38:24, 3.57it/s] 36%|███▌ | 132212/371472 [10:30:31<19:56:43, 3.33it/s] 36%|███▌ | 132213/371472 [10:30:31<18:54:13, 3.52it/s] 36%|███▌ | 132214/371472 [10:30:31<19:44:26, 3.37it/s] 36%|███▌ | 132215/371472 [10:30:31<19:16:41, 3.45it/s] 36%|███▌ | 132216/371472 [10:30:32<20:01:28, 3.32it/s] 36%|███▌ | 132217/371472 [10:30:32<19:23:44, 3.43it/s] 36%|███▌ | 132218/371472 [10:30:32<19:02:26, 3.49it/s] 36%|███▌ | 132219/371472 [10:30:33<18:42:18, 3.55it/s] 36%|███▌ | 132220/371472 [10:30:33<18:38:53, 3.56it/s] {'loss': 3.0644, 'learning_rate': 6.799704798638195e-07, 'epoch': 5.69} + 36%|███▌ | 132220/371472 [10:30:33<18:38:53, 3.56it/s] 36%|███▌ | 132221/371472 [10:30:33<18:25:26, 3.61it/s] 36%|███▌ | 132222/371472 [10:30:33<17:52:45, 3.72it/s] 36%|███▌ | 132223/371472 [10:30:34<17:53:27, 3.71it/s] 36%|███▌ | 132224/371472 [10:30:34<17:46:07, 3.74it/s] 36%|███▌ | 132225/371472 [10:30:34<18:46:06, 3.54it/s] 36%|███▌ | 132226/371472 [10:30:35<19:22:18, 3.43it/s] 36%|███▌ | 132227/371472 [10:30:35<18:59:59, 3.50it/s] 36%|███▌ | 132228/371472 [10:30:35<18:24:27, 3.61it/s] 36%|███▌ | 132229/371472 [10:30:35<18:27:20, 3.60it/s] 36%|███▌ | 132230/371472 [10:30:36<18:31:04, 3.59it/s] 36%|███▌ | 132231/371472 [10:30:36<18:34:55, 3.58it/s] 36%|███▌ | 132232/371472 [10:30:36<19:13:39, 3.46it/s] 36%|███▌ | 132233/371472 [10:30:37<18:52:13, 3.52it/s] 36%|███▌ | 132234/371472 [10:30:37<18:48:35, 3.53it/s] 36%|███▌ | 132235/371472 [10:30:37<20:06:15, 3.31it/s] 36%|███▌ | 132236/371472 [10:30:37<18:40:56, 3.56it/s] 36%|███▌ | 132237/371472 [10:30:38<18:53:59, 3.52it/s] 36%|███▌ | 132238/371472 [10:30:38<18:29:09, 3.59it/s] 36%|███▌ | 132239/371472 [10:30:38<18:25:35, 3.61it/s] 36%|███▌ | 132240/371472 [10:30:38<18:08:02, 3.66it/s] {'loss': 3.1257, 'learning_rate': 6.799219978883405e-07, 'epoch': 5.7} + 36%|███▌ | 132240/371472 [10:30:38<18:08:02, 3.66it/s] 36%|███▌ | 132241/371472 [10:30:39<17:55:24, 3.71it/s] 36%|███▌ | 132242/371472 [10:30:39<18:21:19, 3.62it/s] 36%|███▌ | 132243/371472 [10:30:39<20:23:35, 3.26it/s] 36%|███▌ | 132244/371472 [10:30:40<19:38:26, 3.38it/s] 36%|███▌ | 132245/371472 [10:30:40<19:14:40, 3.45it/s] 36%|███▌ | 132246/371472 [10:30:40<20:39:08, 3.22it/s] 36%|███▌ | 132247/371472 [10:30:41<20:10:37, 3.29it/s] 36%|███▌ | 132248/371472 [10:30:41<19:46:55, 3.36it/s] 36%|███▌ | 132249/371472 [10:30:41<19:04:43, 3.48it/s] 36%|███▌ | 132250/371472 [10:30:41<19:41:30, 3.37it/s] 36%|███▌ | 132251/371472 [10:30:42<18:52:40, 3.52it/s] 36%|███▌ | 132252/371472 [10:30:42<18:32:29, 3.58it/s] 36%|███▌ | 132253/371472 [10:30:42<18:35:23, 3.57it/s] 36%|███▌ | 132254/371472 [10:30:43<18:10:37, 3.66it/s] 36%|███▌ | 132255/371472 [10:30:43<18:30:34, 3.59it/s] 36%|███▌ | 132256/371472 [10:30:43<18:39:45, 3.56it/s] 36%|███▌ | 132257/371472 [10:30:43<18:46:27, 3.54it/s] 36%|███▌ | 132258/371472 [10:30:44<18:41:42, 3.55it/s] 36%|███▌ | 132259/371472 [10:30:44<18:30:47, 3.59it/s] 36%|███▌ | 132260/371472 [10:30:44<17:57:52, 3.70it/s] {'loss': 3.1779, 'learning_rate': 6.798735159128617e-07, 'epoch': 5.7} + 36%|███▌ | 132260/371472 [10:30:44<17:57:52, 3.70it/s] 36%|███▌ | 132261/371472 [10:30:44<17:32:30, 3.79it/s] 36%|███▌ | 132262/371472 [10:30:45<17:26:22, 3.81it/s] 36%|███▌ | 132263/371472 [10:30:45<17:18:35, 3.84it/s] 36%|███▌ | 132264/371472 [10:30:45<18:31:44, 3.59it/s] 36%|███▌ | 132265/371472 [10:30:46<17:55:04, 3.71it/s] 36%|███▌ | 132266/371472 [10:30:46<17:42:19, 3.75it/s] 36%|███▌ | 132267/371472 [10:30:46<17:25:48, 3.81it/s] 36%|███▌ | 132268/371472 [10:30:46<18:08:45, 3.66it/s] 36%|███▌ | 132269/371472 [10:30:47<17:46:33, 3.74it/s] 36%|███▌ | 132270/371472 [10:30:47<18:15:13, 3.64it/s] 36%|███▌ | 132271/371472 [10:30:47<18:20:25, 3.62it/s] 36%|███▌ | 132272/371472 [10:30:47<18:12:10, 3.65it/s] 36%|███▌ | 132273/371472 [10:30:48<18:49:52, 3.53it/s] 36%|███▌ | 132274/371472 [10:30:48<19:14:45, 3.45it/s] 36%|███▌ | 132275/371472 [10:30:48<19:01:48, 3.49it/s] 36%|███▌ | 132276/371472 [10:30:49<19:46:22, 3.36it/s] 36%|███▌ | 132277/371472 [10:30:49<19:22:56, 3.43it/s] 36%|███▌ | 132278/371472 [10:30:49<18:23:46, 3.61it/s] 36%|███▌ | 132279/371472 [10:30:49<17:29:53, 3.80it/s] 36%|███▌ | 132280/371472 [10:30:50<18:07:59, 3.66it/s] {'loss': 3.295, 'learning_rate': 6.798250339373829e-07, 'epoch': 5.7} + 36%|███▌ | 132280/371472 [10:30:50<18:07:59, 3.66it/s] 36%|███▌ | 132281/371472 [10:30:50<17:23:36, 3.82it/s] 36%|███▌ | 132282/371472 [10:30:50<16:53:49, 3.93it/s] 36%|███▌ | 132283/371472 [10:30:50<16:58:30, 3.91it/s] 36%|███▌ | 132284/371472 [10:30:51<17:07:13, 3.88it/s] 36%|███▌ | 132285/371472 [10:30:51<17:42:17, 3.75it/s] 36%|███▌ | 132286/371472 [10:30:51<18:31:50, 3.59it/s] 36%|███▌ | 132287/371472 [10:30:52<18:32:15, 3.58it/s] 36%|███▌ | 132288/371472 [10:30:52<18:11:59, 3.65it/s] 36%|███▌ | 132289/371472 [10:30:52<19:15:24, 3.45it/s] 36%|███▌ | 132290/371472 [10:30:52<19:26:27, 3.42it/s] 36%|███▌ | 132291/371472 [10:30:53<18:41:12, 3.56it/s] 36%|███▌ | 132292/371472 [10:30:53<18:36:55, 3.57it/s] 36%|███▌ | 132293/371472 [10:30:53<17:42:34, 3.75it/s] 36%|███▌ | 132294/371472 [10:30:53<17:40:01, 3.76it/s] 36%|███▌ | 132295/371472 [10:30:54<20:33:45, 3.23it/s] 36%|███▌ | 132296/371472 [10:30:54<21:05:48, 3.15it/s] 36%|███▌ | 132297/371472 [10:30:54<20:05:51, 3.31it/s] 36%|███▌ | 132298/371472 [10:30:55<19:06:33, 3.48it/s] 36%|███▌ | 132299/371472 [10:30:55<19:30:16, 3.41it/s] 36%|███▌ | 132300/371472 [10:30:55<18:43:18, 3.55it/s] {'loss': 3.2091, 'learning_rate': 6.79776551961904e-07, 'epoch': 5.7} + 36%|███▌ | 132300/371472 [10:30:55<18:43:18, 3.55it/s] 36%|███▌ | 132301/371472 [10:30:56<18:48:48, 3.53it/s] 36%|███▌ | 132302/371472 [10:30:56<18:08:44, 3.66it/s] 36%|███▌ | 132303/371472 [10:30:56<17:36:28, 3.77it/s] 36%|███▌ | 132304/371472 [10:30:56<17:39:27, 3.76it/s] 36%|███▌ | 132305/371472 [10:30:57<16:59:26, 3.91it/s] 36%|███▌ | 132306/371472 [10:30:57<17:16:51, 3.84it/s] 36%|███▌ | 132307/371472 [10:30:57<17:10:45, 3.87it/s] 36%|███▌ | 132308/371472 [10:30:57<17:35:31, 3.78it/s] 36%|███▌ | 132309/371472 [10:30:58<18:48:45, 3.53it/s] 36%|███▌ | 132310/371472 [10:30:58<18:37:00, 3.57it/s] 36%|███▌ | 132311/371472 [10:30:58<19:24:41, 3.42it/s] 36%|███▌ | 132312/371472 [10:30:59<18:47:41, 3.53it/s] 36%|███▌ | 132313/371472 [10:30:59<18:57:17, 3.50it/s] 36%|███▌ | 132314/371472 [10:30:59<18:17:39, 3.63it/s] 36%|███▌ | 132315/371472 [10:30:59<17:59:22, 3.69it/s] 36%|███▌ | 132316/371472 [10:31:00<20:09:17, 3.30it/s] 36%|███▌ | 132317/371472 [10:31:00<19:08:54, 3.47it/s] 36%|███▌ | 132318/371472 [10:31:00<18:58:04, 3.50it/s] 36%|███▌ | 132319/371472 [10:31:01<18:11:25, 3.65it/s] 36%|███▌ | 132320/371472 [10:31:01<19:17:15, 3.44it/s] {'loss': 3.1656, 'learning_rate': 6.79728069986425e-07, 'epoch': 5.7} + 36%|███▌ | 132320/371472 [10:31:01<19:17:15, 3.44it/s] 36%|███▌ | 132321/371472 [10:31:01<18:39:59, 3.56it/s] 36%|███▌ | 132322/371472 [10:31:01<18:33:41, 3.58it/s] 36%|███▌ | 132323/371472 [10:31:02<18:33:36, 3.58it/s] 36%|███▌ | 132324/371472 [10:31:02<18:06:33, 3.67it/s] 36%|███▌ | 132325/371472 [10:31:02<19:00:01, 3.50it/s] 36%|███▌ | 132326/371472 [10:31:03<19:07:48, 3.47it/s] 36%|███▌ | 132327/371472 [10:31:03<18:34:15, 3.58it/s] 36%|███▌ | 132328/371472 [10:31:03<18:21:36, 3.62it/s] 36%|███▌ | 132329/371472 [10:31:03<18:05:04, 3.67it/s] 36%|███▌ | 132330/371472 [10:31:04<17:46:38, 3.74it/s] 36%|███▌ | 132331/371472 [10:31:04<18:12:21, 3.65it/s] 36%|███▌ | 132332/371472 [10:31:04<17:23:51, 3.82it/s] 36%|███▌ | 132333/371472 [10:31:04<17:28:15, 3.80it/s] 36%|███▌ | 132334/371472 [10:31:05<17:43:39, 3.75it/s] 36%|███▌ | 132335/371472 [10:31:05<17:51:58, 3.72it/s] 36%|███▌ | 132336/371472 [10:31:05<17:50:16, 3.72it/s] 36%|███▌ | 132337/371472 [10:31:05<17:59:46, 3.69it/s] 36%|███▌ | 132338/371472 [10:31:06<17:30:21, 3.79it/s] 36%|███▌ | 132339/371472 [10:31:06<17:37:25, 3.77it/s] 36%|███▌ | 132340/371472 [10:31:06<17:30:08, 3.80it/s] {'loss': 3.2217, 'learning_rate': 6.796795880109461e-07, 'epoch': 5.7} + 36%|███▌ | 132340/371472 [10:31:06<17:30:08, 3.80it/s] 36%|███▌ | 132341/371472 [10:31:06<17:11:20, 3.86it/s] 36%|███▌ | 132342/371472 [10:31:07<17:58:01, 3.70it/s] 36%|███▌ | 132343/371472 [10:31:07<17:32:41, 3.79it/s] 36%|███▌ | 132344/371472 [10:31:07<17:43:05, 3.75it/s] 36%|███▌ | 132345/371472 [10:31:08<19:00:46, 3.49it/s] 36%|███▌ | 132346/371472 [10:31:08<18:59:40, 3.50it/s] 36%|███▌ | 132347/371472 [10:31:08<18:33:58, 3.58it/s] 36%|███▌ | 132348/371472 [10:31:08<17:56:28, 3.70it/s] 36%|███▌ | 132349/371472 [10:31:09<17:23:53, 3.82it/s] 36%|███▌ | 132350/371472 [10:31:09<17:41:46, 3.75it/s] 36%|███▌ | 132351/371472 [10:31:09<17:50:28, 3.72it/s] 36%|███▌ | 132352/371472 [10:31:10<17:57:20, 3.70it/s] 36%|███▌ | 132353/371472 [10:31:10<18:14:12, 3.64it/s] 36%|███▌ | 132354/371472 [10:31:10<18:46:26, 3.54it/s] 36%|███▌ | 132355/371472 [10:31:10<18:56:44, 3.51it/s] 36%|███▌ | 132356/371472 [10:31:11<17:46:11, 3.74it/s] 36%|███▌ | 132357/371472 [10:31:11<19:24:39, 3.42it/s] 36%|███▌ | 132358/371472 [10:31:11<19:48:56, 3.35it/s] 36%|███▌ | 132359/371472 [10:31:12<18:56:20, 3.51it/s] 36%|███▌ | 132360/371472 [10:31:12<18:35:23, 3.57it/s] {'loss': 3.0465, 'learning_rate': 6.796311060354673e-07, 'epoch': 5.7} + 36%|███▌ | 132360/371472 [10:31:12<18:35:23, 3.57it/s] 36%|███▌ | 132361/371472 [10:31:12<18:39:59, 3.56it/s] 36%|███▌ | 132362/371472 [10:31:12<18:37:10, 3.57it/s] 36%|███▌ | 132363/371472 [10:31:13<18:12:42, 3.65it/s] 36%|███▌ | 132364/371472 [10:31:13<17:46:48, 3.74it/s] 36%|███▌ | 132365/371472 [10:31:13<18:57:57, 3.50it/s] 36%|███▌ | 132366/371472 [10:31:13<18:16:48, 3.63it/s] 36%|███▌ | 132367/371472 [10:31:14<19:05:05, 3.48it/s] 36%|███▌ | 132368/371472 [10:31:14<18:03:10, 3.68it/s] 36%|███▌ | 132369/371472 [10:31:14<17:28:58, 3.80it/s] 36%|███▌ | 132370/371472 [10:31:14<17:02:10, 3.90it/s] 36%|███▌ | 132371/371472 [10:31:15<18:19:07, 3.63it/s] 36%|███▌ | 132372/371472 [10:31:15<18:32:19, 3.58it/s] 36%|███▌ | 132373/371472 [10:31:15<18:21:21, 3.62it/s] 36%|███▌ | 132374/371472 [10:31:16<18:33:34, 3.58it/s] 36%|███▌ | 132375/371472 [10:31:16<18:20:29, 3.62it/s] 36%|███▌ | 132376/371472 [10:31:16<18:09:41, 3.66it/s] 36%|███▌ | 132377/371472 [10:31:16<18:28:08, 3.60it/s] 36%|███▌ | 132378/371472 [10:31:17<17:27:38, 3.80it/s] 36%|███▌ | 132379/371472 [10:31:17<19:14:59, 3.45it/s] 36%|███▌ | 132380/371472 [10:31:17<18:31:33, 3.58it/s] {'loss': 3.0063, 'learning_rate': 6.795826240599883e-07, 'epoch': 5.7} + 36%|███▌ | 132380/371472 [10:31:17<18:31:33, 3.58it/s] 36%|███▌ | 132381/371472 [10:31:18<18:13:51, 3.64it/s] 36%|███▌ | 132382/371472 [10:31:18<19:28:43, 3.41it/s] 36%|███▌ | 132383/371472 [10:31:18<19:05:35, 3.48it/s] 36%|███▌ | 132384/371472 [10:31:19<20:27:53, 3.25it/s] 36%|███▌ | 132385/371472 [10:31:19<19:25:59, 3.42it/s] 36%|███▌ | 132386/371472 [10:31:19<19:00:54, 3.49it/s] 36%|███▌ | 132387/371472 [10:31:19<21:13:28, 3.13it/s] 36%|███▌ | 132388/371472 [10:31:20<20:27:32, 3.25it/s] 36%|███▌ | 132389/371472 [10:31:20<19:50:16, 3.35it/s] 36%|███▌ | 132390/371472 [10:31:20<20:22:55, 3.26it/s] 36%|███▌ | 132391/371472 [10:31:21<21:02:40, 3.16it/s] 36%|███▌ | 132392/371472 [10:31:21<21:28:17, 3.09it/s] 36%|███▌ | 132393/371472 [10:31:21<20:12:34, 3.29it/s] 36%|███▌ | 132394/371472 [10:31:22<19:41:40, 3.37it/s] 36%|███▌ | 132395/371472 [10:31:22<20:13:03, 3.28it/s] 36%|███▌ | 132396/371472 [10:31:22<19:29:41, 3.41it/s] 36%|███▌ | 132397/371472 [10:31:22<20:16:31, 3.28it/s] 36%|███▌ | 132398/371472 [10:31:23<20:44:21, 3.20it/s] 36%|███▌ | 132399/371472 [10:31:23<19:56:38, 3.33it/s] 36%|███▌ | 132400/371472 [10:31:23<19:24:21, 3.42it/s] {'loss': 3.0732, 'learning_rate': 6.795341420845094e-07, 'epoch': 5.7} + 36%|███▌ | 132400/371472 [10:31:23<19:24:21, 3.42it/s] 36%|███▌ | 132401/371472 [10:31:24<18:17:00, 3.63it/s] 36%|███▌ | 132402/371472 [10:31:24<18:20:29, 3.62it/s] 36%|███▌ | 132403/371472 [10:31:24<17:46:03, 3.74it/s] 36%|███▌ | 132404/371472 [10:31:24<18:12:00, 3.65it/s] 36%|███▌ | 132405/371472 [10:31:25<18:02:35, 3.68it/s] 36%|███▌ | 132406/371472 [10:31:25<18:21:54, 3.62it/s] 36%|███▌ | 132407/371472 [10:31:25<20:35:36, 3.22it/s] 36%|███▌ | 132408/371472 [10:31:26<20:07:35, 3.30it/s] 36%|███▌ | 132409/371472 [10:31:26<20:01:52, 3.32it/s] 36%|██���▌ | 132410/371472 [10:31:26<19:26:37, 3.42it/s] 36%|███▌ | 132411/371472 [10:31:27<19:21:45, 3.43it/s] 36%|███▌ | 132412/371472 [10:31:27<18:44:23, 3.54it/s] 36%|███▌ | 132413/371472 [10:31:27<18:34:10, 3.58it/s] 36%|███▌ | 132414/371472 [10:31:27<18:16:39, 3.63it/s] 36%|███▌ | 132415/371472 [10:31:28<17:21:41, 3.82it/s] 36%|███▌ | 132416/371472 [10:31:28<17:50:50, 3.72it/s] 36%|███▌ | 132417/371472 [10:31:28<17:38:08, 3.77it/s] 36%|███▌ | 132418/371472 [10:31:28<18:21:44, 3.62it/s] 36%|███▌ | 132419/371472 [10:31:29<18:30:21, 3.59it/s] 36%|███▌ | 132420/371472 [10:31:29<17:56:49, 3.70it/s] {'loss': 3.0586, 'learning_rate': 6.794856601090306e-07, 'epoch': 5.7} + 36%|███▌ | 132420/371472 [10:31:29<17:56:49, 3.70it/s] 36%|███▌ | 132421/371472 [10:31:29<19:31:37, 3.40it/s] 36%|███▌ | 132422/371472 [10:31:30<20:17:56, 3.27it/s] 36%|███▌ | 132423/371472 [10:31:30<20:17:56, 3.27it/s] 36%|███▌ | 132424/371472 [10:31:30<19:52:25, 3.34it/s] 36%|███▌ | 132425/371472 [10:31:30<19:42:26, 3.37it/s] 36%|███▌ | 132426/371472 [10:31:31<19:18:47, 3.44it/s] 36%|███▌ | 132427/371472 [10:31:31<18:47:33, 3.53it/s] 36%|███▌ | 132428/371472 [10:31:31<18:05:03, 3.67it/s] 36%|███▌ | 132429/371472 [10:31:32<17:37:55, 3.77it/s] 36%|███▌ | 132430/371472 [10:31:32<17:33:49, 3.78it/s] 36%|███▌ | 132431/371472 [10:31:32<18:10:16, 3.65it/s] 36%|███▌ | 132432/371472 [10:31:32<17:49:08, 3.73it/s] 36%|███▌ | 132433/371472 [10:31:33<18:35:49, 3.57it/s] 36%|███▌ | 132434/371472 [10:31:33<18:18:25, 3.63it/s] 36%|███▌ | 132435/371472 [10:31:33<17:30:30, 3.79it/s] 36%|███▌ | 132436/371472 [10:31:33<17:08:02, 3.88it/s] 36%|███▌ | 132437/371472 [10:31:34<16:53:26, 3.93it/s] 36%|███▌ | 132438/371472 [10:31:34<16:39:46, 3.98it/s] 36%|███▌ | 132439/371472 [10:31:34<16:55:16, 3.92it/s] 36%|███▌ | 132440/371472 [10:31:34<17:18:03, 3.84it/s] {'loss': 3.337, 'learning_rate': 6.794371781335518e-07, 'epoch': 5.7} + 36%|███▌ | 132440/371472 [10:31:34<17:18:03, 3.84it/s] 36%|███▌ | 132441/371472 [10:31:35<18:00:42, 3.69it/s] 36%|███▌ | 132442/371472 [10:31:35<18:10:08, 3.65it/s] 36%|███▌ | 132443/371472 [10:31:35<18:07:20, 3.66it/s] 36%|███▌ | 132444/371472 [10:31:35<17:13:34, 3.85it/s] 36%|███▌ | 132445/371472 [10:31:36<17:24:56, 3.81it/s] 36%|███▌ | 132446/371472 [10:31:36<17:25:42, 3.81it/s] 36%|███▌ | 132447/371472 [10:31:36<18:34:50, 3.57it/s] 36%|███▌ | 132448/371472 [10:31:37<19:03:10, 3.48it/s] 36%|███▌ | 132449/371472 [10:31:37<19:09:25, 3.47it/s] 36%|███▌ | 132450/371472 [10:31:37<21:08:30, 3.14it/s] 36%|███▌ | 132451/371472 [10:31:38<21:05:00, 3.15it/s] 36%|███▌ | 132452/371472 [10:31:38<20:56:33, 3.17it/s] 36%|███▌ | 132453/371472 [10:31:38<19:40:44, 3.37it/s] 36%|███▌ | 132454/371472 [10:31:38<19:16:35, 3.44it/s] 36%|███▌ | 132455/371472 [10:31:39<19:09:54, 3.46it/s] 36%|███▌ | 132456/371472 [10:31:39<19:49:20, 3.35it/s] 36%|███▌ | 132457/371472 [10:31:39<20:44:39, 3.20it/s] 36%|███▌ | 132458/371472 [10:31:40<22:11:37, 2.99it/s] 36%|███▌ | 132459/371472 [10:31:40<20:32:46, 3.23it/s] 36%|███▌ | 132460/371472 [10:31:40<20:45:04, 3.20it/s] {'loss': 3.1075, 'learning_rate': 6.793886961580726e-07, 'epoch': 5.71} + 36%|███▌ | 132460/371472 [10:31:40<20:45:04, 3.20it/s] 36%|███▌ | 132461/371472 [10:31:41<20:01:39, 3.32it/s] 36%|███▌ | 132462/371472 [10:31:41<18:57:12, 3.50it/s] 36%|███▌ | 132463/371472 [10:31:41<19:00:07, 3.49it/s] 36%|███▌ | 132464/371472 [10:31:41<18:39:10, 3.56it/s] 36%|███▌ | 132465/371472 [10:31:42<19:12:09, 3.46it/s] 36%|███▌ | 132466/371472 [10:31:42<18:19:12, 3.62it/s] 36%|███▌ | 132467/371472 [10:31:42<19:33:13, 3.40it/s] 36%|███▌ | 132468/371472 [10:31:43<18:34:28, 3.57it/s] 36%|███▌ | 132469/371472 [10:31:43<23:17:12, 2.85it/s] 36%|███▌ | 132470/371472 [10:31:43<22:08:43, 3.00it/s] 36%|███▌ | 132471/371472 [10:31:44<21:05:21, 3.15it/s] 36%|███▌ | 132472/371472 [10:31:44<20:18:18, 3.27it/s] 36%|███▌ | 132473/371472 [10:31:44<19:36:57, 3.38it/s] 36%|███▌ | 132474/371472 [10:31:45<20:53:04, 3.18it/s] 36%|███▌ | 132475/371472 [10:31:45<19:50:16, 3.35it/s] 36%|███▌ | 132476/371472 [10:31:45<21:42:22, 3.06it/s] 36%|███▌ | 132477/371472 [10:31:45<20:06:31, 3.30it/s] 36%|███▌ | 132478/371472 [10:31:46<19:43:31, 3.37it/s] 36%|███▌ | 132479/371472 [10:31:46<18:38:25, 3.56it/s] 36%|███▌ | 132480/371472 [10:31:46<19:31:08, 3.40it/s] {'loss': 3.1479, 'learning_rate': 6.793402141825938e-07, 'epoch': 5.71} + 36%|███▌ | 132480/371472 [10:31:46<19:31:08, 3.40it/s] 36%|███▌ | 132481/371472 [10:31:47<19:15:17, 3.45it/s] 36%|███▌ | 132482/371472 [10:31:47<19:08:20, 3.47it/s] 36%|███▌ | 132483/371472 [10:31:47<20:59:00, 3.16it/s] 36%|███▌ | 132484/371472 [10:31:48<20:55:19, 3.17it/s] 36%|███▌ | 132485/371472 [10:31:48<19:33:11, 3.40it/s] 36%|███▌ | 132486/371472 [10:31:48<18:35:48, 3.57it/s] 36%|███▌ | 132487/371472 [10:31:48<19:27:06, 3.41it/s] 36%|███▌ | 132488/371472 [10:31:49<18:28:37, 3.59it/s] 36%|███▌ | 132489/371472 [10:31:49<18:18:51, 3.62it/s] 36%|███▌ | 132490/371472 [10:31:49<17:36:28, 3.77it/s] 36%|███▌ | 132491/371472 [10:31:49<18:02:13, 3.68it/s] 36%|███▌ | 132492/371472 [10:31:50<19:59:51, 3.32it/s] 36%|███▌ | 132493/371472 [10:31:50<20:42:52, 3.20it/s] 36%|███▌ | 132494/371472 [10:31:51<22:07:28, 3.00it/s] 36%|███▌ | 132495/371472 [10:31:51<20:36:55, 3.22it/s] 36%|███▌ | 132496/371472 [10:31:51<20:21:40, 3.26it/s] 36%|███▌ | 132497/371472 [10:31:51<18:50:06, 3.52it/s] 36%|███▌ | 132498/371472 [10:31:52<19:36:05, 3.39it/s] 36%|███▌ | 132499/371472 [10:31:52<19:20:47, 3.43it/s] 36%|███▌ | 132500/371472 [10:31:52<20:40:38, 3.21it/s] {'loss': 3.068, 'learning_rate': 6.79291732207115e-07, 'epoch': 5.71} + 36%|███▌ | 132500/371472 [10:31:52<20:40:38, 3.21it/s] 36%|███▌ | 132501/371472 [10:31:53<19:42:28, 3.37it/s] 36%|███▌ | 132502/371472 [10:31:53<18:49:07, 3.53it/s] 36%|███▌ | 132503/371472 [10:31:53<18:40:10, 3.56it/s] 36%|███▌ | 132504/371472 [10:31:53<18:35:59, 3.57it/s] 36%|███▌ | 132505/371472 [10:31:54<17:57:19, 3.70it/s] 36%|███▌ | 132506/371472 [10:31:54<18:24:09, 3.61it/s] 36%|███▌ | 132507/371472 [10:31:54<18:08:46, 3.66it/s] 36%|███▌ | 132508/371472 [10:31:54<18:07:50, 3.66it/s] 36%|███▌ | 132509/371472 [10:31:55<19:58:42, 3.32it/s] 36%|███▌ | 132510/371472 [10:31:55<19:54:55, 3.33it/s] 36%|███▌ | 132511/371472 [10:31:55<18:40:14, 3.56it/s] 36%|███▌ | 132512/371472 [10:31:56<18:12:52, 3.64it/s] 36%|███▌ | 132513/371472 [10:31:56<18:20:16, 3.62it/s] 36%|███▌ | 132514/371472 [10:31:56<17:29:34, 3.79it/s] 36%|███▌ | 132515/371472 [10:31:57<20:43:32, 3.20it/s] 36%|███▌ | 132516/371472 [10:31:57<19:36:52, 3.38it/s] 36%|███▌ | 132517/371472 [10:31:57<18:53:15, 3.51it/s] 36%|███▌ | 132518/371472 [10:31:57<18:47:24, 3.53it/s] 36%|███▌ | 132519/371472 [10:31:58<18:47:58, 3.53it/s] 36%|███▌ | 132520/371472 [10:31:58<19:03:39, 3.48it/s] {'loss': 3.2656, 'learning_rate': 6.792432502316361e-07, 'epoch': 5.71} + 36%|███▌ | 132520/371472 [10:31:58<19:03:39, 3.48it/s] 36%|███▌ | 132521/371472 [10:31:58<18:41:35, 3.55it/s] 36%|███▌ | 132522/371472 [10:31:58<18:58:53, 3.50it/s] 36%|███▌ | 132523/371472 [10:31:59<18:32:26, 3.58it/s] 36%|███▌ | 132524/371472 [10:31:59<17:42:30, 3.75it/s] 36%|███▌ | 132525/371472 [10:31:59<17:46:55, 3.73it/s] 36%|███▌ | 132526/371472 [10:32:00<17:38:28, 3.76it/s] 36%|███▌ | 132527/371472 [10:32:00<18:56:13, 3.50it/s] 36%|███▌ | 132528/371472 [10:32:00<18:24:34, 3.61it/s] 36%|███▌ | 132529/371472 [10:32:00<18:00:10, 3.69it/s] 36%|███▌ | 132530/371472 [10:32:01<17:42:46, 3.75it/s] 36%|███▌ | 132531/371472 [10:32:01<17:22:32, 3.82it/s] 36%|███▌ | 132532/371472 [10:32:01<20:04:55, 3.31it/s] 36%|███▌ | 132533/371472 [10:32:02<19:33:31, 3.39it/s] 36%|███▌ | 132534/371472 [10:32:02<19:08:20, 3.47it/s] 36%|███▌ | 132535/371472 [10:32:02<18:50:51, 3.52it/s] 36%|███▌ | 132536/371472 [10:32:02<18:43:03, 3.55it/s] 36%|███▌ | 132537/371472 [10:32:03<18:14:21, 3.64it/s] 36%|███▌ | 132538/371472 [10:32:03<19:01:24, 3.49it/s] 36%|███▌ | 132539/371472 [10:32:03<19:49:43, 3.35it/s] 36%|███▌ | 132540/371472 [10:32:04<18:47:36, 3.53it/s] {'loss': 3.077, 'learning_rate': 6.791947682561571e-07, 'epoch': 5.71} + 36%|███▌ | 132540/371472 [10:32:04<18:47:36, 3.53it/s] 36%|███▌ | 132541/371472 [10:32:04<18:27:29, 3.60it/s] 36%|███▌ | 132542/371472 [10:32:04<18:31:23, 3.58it/s] 36%|███▌ | 132543/371472 [10:32:04<18:15:09, 3.64it/s] 36%|███▌ | 132544/371472 [10:32:05<19:42:19, 3.37it/s] 36%|███▌ | 132545/371472 [10:32:05<19:16:57, 3.44it/s] 36%|███▌ | 132546/371472 [10:32:05<19:17:28, 3.44it/s] 36%|███▌ | 132547/371472 [10:32:06<19:28:55, 3.41it/s] 36%|███▌ | 132548/371472 [10:32:06<19:00:28, 3.49it/s] 36%|███▌ | 132549/371472 [10:32:06<18:53:28, 3.51it/s] 36%|███▌ | 132550/371472 [10:32:06<18:34:58, 3.57it/s] 36%|███▌ | 132551/371472 [10:32:07<18:56:11, 3.50it/s] 36%|███▌ | 132552/371472 [10:32:07<18:18:37, 3.62it/s] 36%|███▌ | 132553/371472 [10:32:07<17:34:16, 3.78it/s] 36%|███▌ | 132554/371472 [10:32:07<17:21:53, 3.82it/s] 36%|███▌ | 132555/371472 [10:32:08<16:55:06, 3.92it/s] 36%|███▌ | 132556/371472 [10:32:08<18:11:40, 3.65it/s] 36%|███▌ | 132557/371472 [10:32:08<17:50:35, 3.72it/s] 36%|███▌ | 132558/371472 [10:32:08<17:47:23, 3.73it/s] 36%|███▌ | 132559/371472 [10:32:09<17:17:22, 3.84it/s] 36%|███▌ | 132560/371472 [10:32:09<17:15:41, 3.84it/s] {'loss': 3.1316, 'learning_rate': 6.791462862806783e-07, 'epoch': 5.71} + 36%|███▌ | 132560/371472 [10:32:09<17:15:41, 3.84it/s] 36%|███▌ | 132561/371472 [10:32:09<17:24:55, 3.81it/s] 36%|███▌ | 132562/371472 [10:32:10<17:38:40, 3.76it/s] 36%|███▌ | 132563/371472 [10:32:10<17:51:42, 3.72it/s] 36%|███▌ | 132564/371472 [10:32:10<18:17:59, 3.63it/s] 36%|███▌ | 132565/371472 [10:32:10<18:38:59, 3.56it/s] 36%|███▌ | 132566/371472 [10:32:11<18:03:56, 3.67it/s] 36%|███▌ | 132567/371472 [10:32:11<17:27:38, 3.80it/s] 36%|███▌ | 132568/371472 [10:32:11<17:30:18, 3.79it/s] 36%|███▌ | 132569/371472 [10:32:11<17:50:21, 3.72it/s] 36%|███▌ | 132570/371472 [10:32:12<17:31:53, 3.79it/s] 36%|███▌ | 132571/371472 [10:32:12<17:30:20, 3.79it/s] 36%|███▌ | 132572/371472 [10:32:12<17:11:47, 3.86it/s] 36%|███▌ | 132573/371472 [10:32:13<18:28:00, 3.59it/s] 36%|███▌ | 132574/371472 [10:32:13<17:34:01, 3.78it/s] 36%|███▌ | 132575/371472 [10:32:13<17:35:37, 3.77it/s] 36%|███▌ | 132576/371472 [10:32:13<17:29:51, 3.79it/s] 36%|███▌ | 132577/371472 [10:32:14<17:48:07, 3.73it/s] 36%|███▌ | 132578/371472 [10:32:14<17:23:50, 3.81it/s] 36%|███▌ | 132579/371472 [10:32:14<17:32:45, 3.78it/s] 36%|███▌ | 132580/371472 [10:32:14<18:35:47, 3.57it/s] {'loss': 3.1422, 'learning_rate': 6.790978043051994e-07, 'epoch': 5.71} + 36%|███▌ | 132580/371472 [10:32:14<18:35:47, 3.57it/s] 36%|███▌ | 132581/371472 [10:32:15<17:57:33, 3.69it/s] 36%|███▌ | 132582/371472 [10:32:15<18:06:46, 3.66it/s] 36%|███▌ | 132583/371472 [10:32:15<18:34:38, 3.57it/s] 36%|███▌ | 132584/371472 [10:32:15<18:32:46, 3.58it/s] 36%|███▌ | 132585/371472 [10:32:16<19:03:20, 3.48it/s] 36%|███▌ | 132586/371472 [10:32:16<18:15:33, 3.63it/s] 36%|███▌ | 132587/371472 [10:32:16<17:53:12, 3.71it/s] 36%|███▌ | 132588/371472 [10:32:17<18:08:19, 3.66it/s] 36%|███▌ | 132589/371472 [10:32:17<19:10:36, 3.46it/s] 36%|███▌ | 132590/371472 [10:32:17<18:36:32, 3.57it/s] 36%|███▌ | 132591/371472 [10:32:17<18:59:52, 3.49it/s] 36%|███▌ | 132592/371472 [10:32:18<18:36:42, 3.57it/s] 36%|███▌ | 132593/371472 [10:32:18<17:53:04, 3.71it/s] 36%|███▌ | 132594/371472 [10:32:18<18:17:02, 3.63it/s] 36%|███▌ | 132595/371472 [10:32:19<19:09:37, 3.46it/s] 36%|███▌ | 132596/371472 [10:32:19<19:51:38, 3.34it/s] 36%|███▌ | 132597/371472 [10:32:19<19:37:05, 3.38it/s] 36%|███▌ | 132598/371472 [10:32:19<18:31:10, 3.58it/s] 36%|███▌ | 132599/371472 [10:32:20<18:10:26, 3.65it/s] 36%|███▌ | 132600/371472 [10:32:20<18:14:35, 3.64it/s] {'loss': 3.1795, 'learning_rate': 6.790493223297205e-07, 'epoch': 5.71} + 36%|███▌ | 132600/371472 [10:32:20<18:14:35, 3.64it/s] 36%|███▌ | 132601/371472 [10:32:20<17:39:31, 3.76it/s] 36%|███▌ | 132602/371472 [10:32:20<17:34:47, 3.77it/s] 36%|███▌ | 132603/371472 [10:32:21<17:10:09, 3.86it/s] 36%|███▌ | 132604/371472 [10:32:21<17:03:42, 3.89it/s] 36%|███▌ | 132605/371472 [10:32:21<17:00:14, 3.90it/s] 36%|███▌ | 132606/371472 [10:32:21<16:36:41, 3.99it/s] 36%|███▌ | 132607/371472 [10:32:22<18:23:22, 3.61it/s] 36%|███▌ | 132608/371472 [10:32:22<17:49:10, 3.72it/s] 36%|███▌ | 132609/371472 [10:32:22<17:41:48, 3.75it/s] 36%|███▌ | 132610/371472 [10:32:23<19:35:47, 3.39it/s] 36%|███▌ | 132611/371472 [10:32:23<18:42:37, 3.55it/s] 36%|███▌ | 132612/371472 [10:32:23<17:48:54, 3.72it/s] 36%|███▌ | 132613/371472 [10:32:23<17:19:51, 3.83it/s] 36%|███▌ | 132614/371472 [10:32:24<18:08:20, 3.66it/s] 36%|███▌ | 132615/371472 [10:32:24<18:02:18, 3.68it/s] 36%|███▌ | 132616/371472 [10:32:24<17:37:02, 3.77it/s] 36%|███▌ | 132617/371472 [10:32:24<17:12:28, 3.86it/s] 36%|███▌ | 132618/371472 [10:32:25<16:50:15, 3.94it/s] 36%|███▌ | 132619/371472 [10:32:25<17:01:55, 3.90it/s] 36%|███▌ | 132620/371472 [10:32:25<17:36:35, 3.77it/s] {'loss': 3.2757, 'learning_rate': 6.790008403542415e-07, 'epoch': 5.71} + 36%|███▌ | 132620/371472 [10:32:25<17:36:35, 3.77it/s] 36%|███▌ | 132621/371472 [10:32:26<16:54:51, 3.92it/s] 36%|███▌ | 132622/371472 [10:32:26<16:58:23, 3.91it/s] 36%|███▌ | 132623/371472 [10:32:26<17:13:36, 3.85it/s] 36%|███▌ | 132624/371472 [10:32:26<17:32:37, 3.78it/s] 36%|███▌ | 132625/371472 [10:32:27<18:12:29, 3.64it/s] 36%|███▌ | 132626/371472 [10:32:27<17:16:42, 3.84it/s] 36%|███▌ | 132627/371472 [10:32:27<17:30:03, 3.79it/s] 36%|███▌ | 132628/371472 [10:32:27<17:26:07, 3.81it/s] 36%|███▌ | 132629/371472 [10:32:28<17:35:11, 3.77it/s] 36%|███▌ | 132630/371472 [10:32:28<17:30:05, 3.79it/s] 36%|███▌ | 132631/371472 [10:32:28<18:00:34, 3.68it/s] 36%|███▌ | 132632/371472 [10:32:28<18:00:30, 3.68it/s] 36%|███▌ | 132633/371472 [10:32:29<18:35:49, 3.57it/s] 36%|███▌ | 132634/371472 [10:32:29<18:22:09, 3.61it/s] 36%|███▌ | 132635/371472 [10:32:29<17:33:01, 3.78it/s] 36%|███▌ | 132636/371472 [10:32:30<17:21:20, 3.82it/s] 36%|███▌ | 132637/371472 [10:32:30<18:25:18, 3.60it/s] 36%|███▌ | 132638/371472 [10:32:30<18:28:16, 3.59it/s] 36%|███▌ | 132639/371472 [10:32:30<18:26:02, 3.60it/s] 36%|███▌ | 132640/371472 [10:32:31<17:41:00, 3.75it/s] {'loss': 3.2422, 'learning_rate': 6.789523583787627e-07, 'epoch': 5.71} + 36%|███▌ | 132640/371472 [10:32:31<17:41:00, 3.75it/s] 36%|███▌ | 132641/371472 [10:32:31<19:02:31, 3.48it/s] 36%|███▌ | 132642/371472 [10:32:31<20:20:21, 3.26it/s] 36%|███▌ | 132643/371472 [10:32:32<19:26:37, 3.41it/s] 36%|███▌ | 132644/371472 [10:32:32<19:08:16, 3.47it/s] 36%|███▌ | 132645/371472 [10:32:32<18:40:30, 3.55it/s] 36%|███▌ | 132646/371472 [10:32:32<18:02:11, 3.68it/s] 36%|███▌ | 132647/371472 [10:32:33<17:34:04, 3.78it/s] 36%|███▌ | 132648/371472 [10:32:33<17:21:48, 3.82it/s] 36%|███▌ | 132649/371472 [10:32:33<18:21:29, 3.61it/s] 36%|███▌ | 132650/371472 [10:32:33<18:31:19, 3.58it/s] 36%|███▌ | 132651/371472 [10:32:34<19:08:28, 3.47it/s] 36%|███▌ | 132652/371472 [10:32:34<19:10:03, 3.46it/s] 36%|███▌ | 132653/371472 [10:32:34<19:23:17, 3.42it/s] 36%|███▌ | 132654/371472 [10:32:35<20:44:03, 3.20it/s] 36%|███▌ | 132655/371472 [10:32:35<20:36:12, 3.22it/s] 36%|███▌ | 132656/371472 [10:32:35<19:57:33, 3.32it/s] 36%|███▌ | 132657/371472 [10:32:36<19:23:22, 3.42it/s] 36%|███▌ | 132658/371472 [10:32:36<19:01:22, 3.49it/s] 36%|███▌ | 132659/371472 [10:32:36<18:21:20, 3.61it/s] 36%|███▌ | 132660/371472 [10:32:36<17:49:05, 3.72it/s] {'loss': 3.1589, 'learning_rate': 6.789038764032839e-07, 'epoch': 5.71} + 36%|███▌ | 132660/371472 [10:32:36<17:49:05, 3.72it/s] 36%|███▌ | 132661/371472 [10:32:37<17:16:56, 3.84it/s] 36%|███▌ | 132662/371472 [10:32:37<17:12:18, 3.86it/s] 36%|███▌ | 132663/371472 [10:32:37<17:48:15, 3.73it/s] 36%|███▌ | 132664/371472 [10:32:37<17:47:57, 3.73it/s] 36%|███▌ | 132665/371472 [10:32:38<17:33:03, 3.78it/s] 36%|███▌ | 132666/371472 [10:32:38<17:19:37, 3.83it/s] 36%|███▌ | 132667/371472 [10:32:38<17:18:19, 3.83it/s] 36%|███▌ | 132668/371472 [10:32:38<17:29:00, 3.79it/s] 36%|███▌ | 132669/371472 [10:32:39<18:38:26, 3.56it/s] 36%|███▌ | 132670/371472 [10:32:39<18:55:57, 3.50it/s] 36%|███▌ | 132671/371472 [10:32:39<18:57:54, 3.50it/s] 36%|███▌ | 132672/371472 [10:32:40<18:16:00, 3.63it/s] 36%|███▌ | 132673/371472 [10:32:40<18:10:10, 3.65it/s] 36%|███▌ | 132674/371472 [10:32:40<19:16:51, 3.44it/s] 36%|███▌ | 132675/371472 [10:32:41<20:12:18, 3.28it/s] 36%|███▌ | 132676/371472 [10:32:41<19:20:46, 3.43it/s] 36%|███▌ | 132677/371472 [10:32:41<18:50:03, 3.52it/s] 36%|███▌ | 132678/371472 [10:32:41<18:19:24, 3.62it/s] 36%|███▌ | 132679/371472 [10:32:42<18:45:03, 3.54it/s] 36%|███▌ | 132680/371472 [10:32:42<18:40:33, 3.55it/s] {'loss': 3.328, 'learning_rate': 6.78855394427805e-07, 'epoch': 5.71} + 36%|███▌ | 132680/371472 [10:32:42<18:40:33, 3.55it/s] 36%|███▌ | 132681/371472 [10:32:42<18:09:32, 3.65it/s] 36%|███▌ | 132682/371472 [10:32:42<18:23:27, 3.61it/s] 36%|███▌ | 132683/371472 [10:32:43<18:41:33, 3.55it/s] 36%|███▌ | 132684/371472 [10:32:43<18:20:56, 3.61it/s] 36%|███▌ | 132685/371472 [10:32:43<18:35:52, 3.57it/s] 36%|███▌ | 132686/371472 [10:32:44<18:45:06, 3.54it/s] 36%|███▌ | 132687/371472 [10:32:44<18:27:15, 3.59it/s] 36%|███▌ | 132688/371472 [10:32:44<18:39:14, 3.56it/s] 36%|███▌ | 132689/371472 [10:32:44<19:16:59, 3.44it/s] 36%|███▌ | 132690/371472 [10:32:45<18:41:54, 3.55it/s] 36%|███▌ | 132691/371472 [10:32:45<18:31:18, 3.58it/s] 36%|███▌ | 132692/371472 [10:32:45<19:02:17, 3.48it/s] 36%|███▌ | 132693/371472 [10:32:46<18:44:15, 3.54it/s] 36%|███▌ | 132694/371472 [10:32:46<18:47:46, 3.53it/s] 36%|███▌ | 132695/371472 [10:32:46<19:30:19, 3.40it/s] 36%|███▌ | 132696/371472 [10:32:46<19:29:30, 3.40it/s] 36%|███▌ | 132697/371472 [10:32:47<19:18:22, 3.44it/s] 36%|███▌ | 132698/371472 [10:32:47<19:21:38, 3.43it/s] 36%|███▌ | 132699/371472 [10:32:47<19:04:40, 3.48it/s] 36%|███▌ | 132700/371472 [10:32:48<18:01:29, 3.68it/s] {'loss': 3.1003, 'learning_rate': 6.78806912452326e-07, 'epoch': 5.72} + 36%|███▌ | 132700/371472 [10:32:48<18:01:29, 3.68it/s] 36%|███▌ | 132701/371472 [10:32:48<17:45:58, 3.73it/s] 36%|███▌ | 132702/371472 [10:32:48<17:59:59, 3.68it/s] 36%|███▌ | 132703/371472 [10:32:48<18:06:43, 3.66it/s] 36%|███▌ | 132704/371472 [10:32:49<17:36:00, 3.77it/s] 36%|███▌ | 132705/371472 [10:32:49<17:31:17, 3.79it/s] 36%|███▌ | 132706/371472 [10:32:49<18:48:40, 3.53it/s] 36%|███▌ | 132707/371472 [10:32:50<20:13:29, 3.28it/s] 36%|███▌ | 132708/371472 [10:32:50<19:14:42, 3.45it/s] 36%|███▌ | 132709/371472 [10:32:50<19:20:17, 3.43it/s] 36%|███▌ | 132710/371472 [10:32:50<19:33:48, 3.39it/s] 36%|███▌ | 132711/371472 [10:32:51<20:14:36, 3.28it/s] 36%|███▌ | 132712/371472 [10:32:51<20:10:45, 3.29it/s] 36%|███▌ | 132713/371472 [10:32:51<19:18:12, 3.44it/s] 36%|███▌ | 132714/371472 [10:32:52<18:10:58, 3.65it/s] 36%|███▌ | 132715/371472 [10:32:52<17:40:56, 3.75it/s] 36%|███▌ | 132716/371472 [10:32:52<17:27:04, 3.80it/s] 36%|███▌ | 132717/371472 [10:32:52<17:03:53, 3.89it/s] 36%|███▌ | 132718/371472 [10:32:53<17:41:06, 3.75it/s] 36%|███▌ | 132719/371472 [10:32:53<18:40:04, 3.55it/s] 36%|███▌ | 132720/371472 [10:32:53<19:03:11, 3.48it/s] {'loss': 2.895, 'learning_rate': 6.787584304768471e-07, 'epoch': 5.72} + 36%|███▌ | 132720/371472 [10:32:53<19:03:11, 3.48it/s] 36%|███▌ | 132721/371472 [10:32:54<19:59:39, 3.32it/s] 36%|███▌ | 132722/371472 [10:32:54<18:56:44, 3.50it/s] 36%|███▌ | 132723/371472 [10:32:54<18:24:01, 3.60it/s] 36%|███▌ | 132724/371472 [10:32:54<17:54:22, 3.70it/s] 36%|███▌ | 132725/371472 [10:32:55<18:05:27, 3.67it/s] 36%|███▌ | 132726/371472 [10:32:55<17:50:40, 3.72it/s] 36%|███▌ | 132727/371472 [10:32:55<17:22:40, 3.82it/s] 36%|███▌ | 132728/371472 [10:32:55<17:29:43, 3.79it/s] 36%|███▌ | 132729/371472 [10:32:56<17:50:58, 3.72it/s] 36%|███▌ | 132730/371472 [10:32:56<18:17:24, 3.63it/s] 36%|███▌ | 132731/371472 [10:32:56<17:31:25, 3.78it/s] 36%|███▌ | 132732/371472 [10:32:56<17:33:19, 3.78it/s] 36%|███▌ | 132733/371472 [10:32:57<18:35:48, 3.57it/s] 36%|███▌ | 132734/371472 [10:32:57<18:52:26, 3.51it/s] 36%|███▌ | 132735/371472 [10:32:57<18:38:56, 3.56it/s] 36%|███▌ | 132736/371472 [10:32:58<18:35:09, 3.57it/s] 36%|███▌ | 132737/371472 [10:32:58<20:09:25, 3.29it/s] 36%|███▌ | 132738/371472 [10:32:58<19:39:03, 3.37it/s] 36%|███▌ | 132739/371472 [10:32:59<19:18:48, 3.43it/s] 36%|███▌ | 132740/371472 [10:32:59<18:54:52, 3.51it/s] {'loss': 3.2938, 'learning_rate': 6.787099485013683e-07, 'epoch': 5.72} + 36%|███▌ | 132740/371472 [10:32:59<18:54:52, 3.51it/s] 36%|███▌ | 132741/371472 [10:32:59<18:01:02, 3.68it/s] 36%|███▌ | 132742/371472 [10:32:59<18:00:27, 3.68it/s] 36%|███▌ | 132743/371472 [10:33:00<17:56:40, 3.70it/s] 36%|███▌ | 132744/371472 [10:33:00<17:59:50, 3.68it/s] 36%|███▌ | 132745/371472 [10:33:00<18:06:37, 3.66it/s] 36%|███▌ | 132746/371472 [10:33:00<18:22:20, 3.61it/s] 36%|███▌ | 132747/371472 [10:33:01<18:31:19, 3.58it/s] 36%|███▌ | 132748/371472 [10:33:01<18:21:41, 3.61it/s] 36%|███▌ | 132749/371472 [10:33:01<17:38:07, 3.76it/s] 36%|███▌ | 132750/371472 [10:33:01<17:37:24, 3.76it/s] 36%|███▌ | 132751/371472 [10:33:02<17:42:40, 3.74it/s] 36%|███▌ | 132752/371472 [10:33:02<17:13:38, 3.85it/s] 36%|███▌ | 132753/371472 [10:33:02<17:10:42, 3.86it/s] 36%|███▌ | 132754/371472 [10:33:02<17:09:34, 3.86it/s] 36%|███▌ | 132755/371472 [10:33:03<16:52:47, 3.93it/s] 36%|███▌ | 132756/371472 [10:33:03<16:32:10, 4.01it/s] 36%|███▌ | 132757/371472 [10:33:03<17:23:13, 3.81it/s] 36%|███▌ | 132758/371472 [10:33:04<17:20:00, 3.83it/s] 36%|███▌ | 132759/371472 [10:33:04<17:13:44, 3.85it/s] 36%|███▌ | 132760/371472 [10:33:04<17:15:43, 3.84it/s] {'loss': 3.1938, 'learning_rate': 6.786614665258893e-07, 'epoch': 5.72} + 36%|███▌ | 132760/371472 [10:33:04<17:15:43, 3.84it/s] 36%|███▌ | 132761/371472 [10:33:04<17:58:34, 3.69it/s] 36%|███▌ | 132762/371472 [10:33:05<18:01:34, 3.68it/s] 36%|███▌ | 132763/371472 [10:33:05<17:58:47, 3.69it/s] 36%|███▌ | 132764/371472 [10:33:05<17:31:48, 3.78it/s] 36%|███▌ | 132765/371472 [10:33:05<17:12:34, 3.85it/s] 36%|███▌ | 132766/371472 [10:33:06<17:29:16, 3.79it/s] 36%|███▌ | 132767/371472 [10:33:06<17:44:01, 3.74it/s] 36%|███▌ | 132768/371472 [10:33:06<18:02:48, 3.67it/s] 36%|███▌ | 132769/371472 [10:33:06<17:44:31, 3.74it/s] 36%|███▌ | 132770/371472 [10:33:07<18:05:12, 3.67it/s] 36%|███▌ | 132771/371472 [10:33:07<18:20:39, 3.61it/s] 36%|███▌ | 132772/371472 [10:33:07<17:48:49, 3.72it/s] 36%|███▌ | 132773/371472 [10:33:08<19:09:20, 3.46it/s] 36%|███▌ | 132774/371472 [10:33:08<19:52:48, 3.34it/s] 36%|███▌ | 132775/371472 [10:33:08<19:10:21, 3.46it/s] 36%|███▌ | 132776/371472 [10:33:08<18:42:47, 3.54it/s] 36%|███▌ | 132777/371472 [10:33:09<18:48:06, 3.53it/s] 36%|███▌ | 132778/371472 [10:33:09<18:49:55, 3.52it/s] 36%|███▌ | 132779/371472 [10:33:09<18:34:31, 3.57it/s] 36%|███▌ | 132780/371472 [10:33:10<18:20:52, 3.61it/s] {'loss': 3.218, 'learning_rate': 6.786129845504104e-07, 'epoch': 5.72} + 36%|███▌ | 132780/371472 [10:33:10<18:20:52, 3.61it/s] 36%|███▌ | 132781/371472 [10:33:10<18:01:12, 3.68it/s] 36%|███▌ | 132782/371472 [10:33:10<19:54:23, 3.33it/s] 36%|███▌ | 132783/371472 [10:33:11<19:56:54, 3.32it/s] 36%|███▌ | 132784/371472 [10:33:11<18:56:24, 3.50it/s] 36%|███▌ | 132785/371472 [10:33:11<18:17:46, 3.62it/s] 36%|███▌ | 132786/371472 [10:33:11<17:56:38, 3.69it/s] 36%|███▌ | 132787/371472 [10:33:12<17:39:54, 3.75it/s] 36%|███▌ | 132788/371472 [10:33:12<18:03:24, 3.67it/s] 36%|███▌ | 132789/371472 [10:33:12<18:03:17, 3.67it/s] 36%|███▌ | 132790/371472 [10:33:12<19:12:30, 3.45it/s] 36%|███▌ | 132791/371472 [10:33:13<18:17:38, 3.62it/s] 36%|███▌ | 132792/371472 [10:33:13<17:36:34, 3.76it/s] 36%|███▌ | 132793/371472 [10:33:13<17:26:58, 3.80it/s] 36%|███▌ | 132794/371472 [10:33:13<17:39:02, 3.76it/s] 36%|███▌ | 132795/371472 [10:33:14<18:23:39, 3.60it/s] 36%|███▌ | 132796/371472 [10:33:14<17:43:28, 3.74it/s] 36%|███▌ | 132797/371472 [10:33:14<18:51:02, 3.52it/s] 36%|███▌ | 132798/371472 [10:33:15<20:23:50, 3.25it/s] 36%|███▌ | 132799/371472 [10:33:15<20:54:55, 3.17it/s] 36%|███▌ | 132800/371472 [10:33:15<20:28:28, 3.24it/s] {'loss': 3.2835, 'learning_rate': 6.785645025749316e-07, 'epoch': 5.72} + 36%|███▌ | 132800/371472 [10:33:15<20:28:28, 3.24it/s] 36%|███▌ | 132801/371472 [10:33:16<20:26:01, 3.24it/s] 36%|███▌ | 132802/371472 [10:33:16<20:44:04, 3.20it/s] 36%|███▌ | 132803/371472 [10:33:16<19:48:10, 3.35it/s] 36%|███▌ | 132804/371472 [10:33:17<20:19:07, 3.26it/s] 36%|███▌ | 132805/371472 [10:33:17<20:35:57, 3.22it/s] 36%|███▌ | 132806/371472 [10:33:17<20:31:10, 3.23it/s] 36%|███▌ | 132807/371472 [10:33:17<19:33:16, 3.39it/s] 36%|███▌ | 132808/371472 [10:33:18<18:41:14, 3.55it/s] 36%|███▌ | 132809/371472 [10:33:18<18:40:12, 3.55it/s] 36%|███▌ | 132810/371472 [10:33:18<18:22:53, 3.61it/s] 36%|███▌ | 132811/371472 [10:33:18<18:21:15, 3.61it/s] 36%|███▌ | 132812/371472 [10:33:19<18:25:06, 3.60it/s] 36%|███▌ | 132813/371472 [10:33:19<18:08:54, 3.65it/s] 36%|███▌ | 132814/371472 [10:33:19<18:21:38, 3.61it/s] 36%|███▌ | 132815/371472 [10:33:20<18:17:52, 3.62it/s] 36%|███▌ | 132816/371472 [10:33:20<18:43:30, 3.54it/s] 36%|███▌ | 132817/371472 [10:33:20<17:44:55, 3.74it/s] 36%|███▌ | 132818/371472 [10:33:20<17:24:58, 3.81it/s] 36%|███▌ | 132819/371472 [10:33:21<17:23:34, 3.81it/s] 36%|███▌ | 132820/371472 [10:33:21<18:30:24, 3.58it/s] {'loss': 3.1632, 'learning_rate': 6.785160205994528e-07, 'epoch': 5.72} + 36%|███▌ | 132820/371472 [10:33:21<18:30:24, 3.58it/s] 36%|███▌ | 132821/371472 [10:33:21<18:04:13, 3.67it/s] 36%|███▌ | 132822/371472 [10:33:21<17:54:42, 3.70it/s] 36%|███▌ | 132823/371472 [10:33:22<18:21:12, 3.61it/s] 36%|███▌ | 132824/371472 [10:33:22<18:10:36, 3.65it/s] 36%|███▌ | 132825/371472 [10:33:22<17:44:39, 3.74it/s] 36%|███▌ | 132826/371472 [10:33:23<17:48:40, 3.72it/s] 36%|███▌ | 132827/371472 [10:33:23<17:13:26, 3.85it/s] 36%|███▌ | 132828/371472 [10:33:23<17:11:23, 3.86it/s] 36%|███▌ | 132829/371472 [10:33:23<18:53:08, 3.51it/s] 36%|███▌ | 132830/371472 [10:33:24<18:47:51, 3.53it/s] 36%|███▌ | 132831/371472 [10:33:24<17:47:37, 3.73it/s] 36%|███▌ | 132832/371472 [10:33:24<18:34:41, 3.57it/s] 36%|███▌ | 132833/371472 [10:33:25<20:04:00, 3.30it/s] 36%|███▌ | 132834/371472 [10:33:25<18:48:39, 3.52it/s] 36%|███▌ | 132835/371472 [10:33:25<20:29:17, 3.24it/s] 36%|███▌ | 132836/371472 [10:33:25<19:59:11, 3.32it/s] 36%|███▌ | 132837/371472 [10:33:26<18:49:30, 3.52it/s] 36%|███▌ | 132838/371472 [10:33:26<18:42:14, 3.54it/s] 36%|███▌ | 132839/371472 [10:33:26<18:57:16, 3.50it/s] 36%|███▌ | 132840/371472 [10:33:27<19:14:54, 3.44it/s] {'loss': 3.2434, 'learning_rate': 6.784675386239737e-07, 'epoch': 5.72} + 36%|███▌ | 132840/371472 [10:33:27<19:14:54, 3.44it/s] 36%|███▌ | 132841/371472 [10:33:27<18:58:45, 3.49it/s] 36%|███▌ | 132842/371472 [10:33:27<18:54:53, 3.50it/s] 36%|███▌ | 132843/371472 [10:33:27<18:57:49, 3.50it/s] 36%|███▌ | 132844/371472 [10:33:28<19:03:39, 3.48it/s] 36%|███▌ | 132845/371472 [10:33:28<18:30:38, 3.58it/s] 36%|███▌ | 132846/371472 [10:33:28<18:38:50, 3.55it/s] 36%|███▌ | 132847/371472 [10:33:29<18:46:54, 3.53it/s] 36%|███▌ | 132848/371472 [10:33:29<18:06:08, 3.66it/s] 36%|███▌ | 132849/371472 [10:33:29<18:24:01, 3.60it/s] 36%|███▌ | 132850/371472 [10:33:29<18:12:39, 3.64it/s] 36%|███▌ | 132851/371472 [10:33:30<17:24:35, 3.81it/s] 36%|███▌ | 132852/371472 [10:33:30<17:06:48, 3.87it/s] 36%|███▌ | 132853/371472 [10:33:30<16:41:26, 3.97it/s] 36%|███▌ | 132854/371472 [10:33:30<17:43:39, 3.74it/s] 36%|███▌ | 132855/371472 [10:33:31<17:39:05, 3.76it/s] 36%|███▌ | 132856/371472 [10:33:31<17:28:37, 3.79it/s] 36%|███▌ | 132857/371472 [10:33:31<18:20:14, 3.61it/s] 36%|███▌ | 132858/371472 [10:33:31<17:55:32, 3.70it/s] 36%|███▌ | 132859/371472 [10:33:32<17:52:20, 3.71it/s] 36%|███▌ | 132860/371472 [10:33:32<18:26:51, 3.59it/s] {'loss': 3.2058, 'learning_rate': 6.784190566484949e-07, 'epoch': 5.72} + 36%|███▌ | 132860/371472 [10:33:32<18:26:51, 3.59it/s] 36%|███▌ | 132861/371472 [10:33:32<18:07:41, 3.66it/s] 36%|███▌ | 132862/371472 [10:33:33<21:19:41, 3.11it/s] 36%|███▌ | 132863/371472 [10:33:33<19:50:42, 3.34it/s] 36%|███▌ | 132864/371472 [10:33:33<18:52:44, 3.51it/s] 36%|███▌ | 132865/371472 [10:33:33<18:16:07, 3.63it/s] 36%|███▌ | 132866/371472 [10:33:34<18:09:52, 3.65it/s] 36%|███▌ | 132867/371472 [10:33:34<18:16:06, 3.63it/s] 36%|███▌ | 132868/371472 [10:33:34<18:00:08, 3.68it/s] 36%|███▌ | 132869/371472 [10:33:35<18:01:28, 3.68it/s] 36%|███▌ | 132870/371472 [10:33:35<20:30:35, 3.23it/s] 36%|███▌ | 132871/371472 [10:33:35<20:09:46, 3.29it/s] 36%|███▌ | 132872/371472 [10:33:35<18:43:14, 3.54it/s] 36%|███▌ | 132873/371472 [10:33:36<19:14:14, 3.45it/s] 36%|███▌ | 132874/371472 [10:33:36<18:12:47, 3.64it/s] 36%|███▌ | 132875/371472 [10:33:36<18:08:11, 3.65it/s] 36%|███▌ | 132876/371472 [10:33:37<17:10:11, 3.86it/s] 36%|███▌ | 132877/371472 [10:33:37<17:14:57, 3.84it/s] 36%|███▌ | 132878/371472 [10:33:37<17:36:47, 3.76it/s] 36%|███▌ | 132879/371472 [10:33:37<19:14:09, 3.45it/s] 36%|███▌ | 132880/371472 [10:33:38<19:48:06, 3.35it/s] {'loss': 3.282, 'learning_rate': 6.78370574673016e-07, 'epoch': 5.72} + 36%|███▌ | 132880/371472 [10:33:38<19:48:06, 3.35it/s] 36%|███▌ | 132881/371472 [10:33:38<19:08:18, 3.46it/s] 36%|███▌ | 132882/371472 [10:33:38<18:42:46, 3.54it/s] 36%|███▌ | 132883/371472 [10:33:39<18:30:45, 3.58it/s] 36%|███▌ | 132884/371472 [10:33:39<17:58:01, 3.69it/s] 36%|███▌ | 132885/371472 [10:33:39<18:51:08, 3.52it/s] 36%|███▌ | 132886/371472 [10:33:39<18:30:37, 3.58it/s] 36%|███▌ | 132887/371472 [10:33:40<21:01:55, 3.15it/s] 36%|███▌ | 132888/371472 [10:33:40<19:35:45, 3.38it/s] 36%|███▌ | 132889/371472 [10:33:40<19:09:02, 3.46it/s] 36%|███▌ | 132890/371472 [10:33:41<18:15:58, 3.63it/s] 36%|███▌ | 132891/371472 [10:33:41<17:49:08, 3.72it/s] 36%|███▌ | 132892/371472 [10:33:41<17:54:50, 3.70it/s] 36%|███▌ | 132893/371472 [10:33:41<17:42:50, 3.74it/s] 36%|███▌ | 132894/371472 [10:33:42<18:23:19, 3.60it/s] 36%|███▌ | 132895/371472 [10:33:42<17:59:53, 3.68it/s] 36%|███▌ | 132896/371472 [10:33:42<18:00:05, 3.68it/s] 36%|███▌ | 132897/371472 [10:33:42<18:25:14, 3.60it/s] 36%|███▌ | 132898/371472 [10:33:43<18:09:19, 3.65it/s] 36%|███▌ | 132899/371472 [10:33:43<17:37:36, 3.76it/s] 36%|███▌ | 132900/371472 [10:33:43<18:07:44, 3.66it/s] {'loss': 3.0866, 'learning_rate': 6.783220926975372e-07, 'epoch': 5.72} + 36%|███▌ | 132900/371472 [10:33:43<18:07:44, 3.66it/s] 36%|███▌ | 132901/371472 [10:33:44<17:58:43, 3.69it/s] 36%|███▌ | 132902/371472 [10:33:44<17:33:05, 3.78it/s] 36%|███▌ | 132903/371472 [10:33:44<17:21:04, 3.82it/s] 36%|███▌ | 132904/371472 [10:33:44<19:05:02, 3.47it/s] 36%|███▌ | 132905/371472 [10:33:45<18:29:41, 3.58it/s] 36%|███▌ | 132906/371472 [10:33:45<18:02:14, 3.67it/s] 36%|███▌ | 132907/371472 [10:33:45<19:36:11, 3.38it/s] 36%|███▌ | 132908/371472 [10:33:46<20:36:58, 3.21it/s] 36%|███▌ | 132909/371472 [10:33:46<19:33:23, 3.39it/s] 36%|███▌ | 132910/371472 [10:33:46<19:30:51, 3.40it/s] 36%|███▌ | 132911/371472 [10:33:46<18:46:21, 3.53it/s] 36%|███▌ | 132912/371472 [10:33:47<17:57:34, 3.69it/s] 36%|███▌ | 132913/371472 [10:33:47<17:31:15, 3.78it/s] 36%|███▌ | 132914/371472 [10:33:47<17:47:56, 3.72it/s] 36%|███▌ | 132915/371472 [10:33:47<18:54:19, 3.51it/s] 36%|███▌ | 132916/371472 [10:33:48<18:25:21, 3.60it/s] 36%|███▌ | 132917/371472 [10:33:48<18:23:50, 3.60it/s] 36%|███▌ | 132918/371472 [10:33:48<19:24:53, 3.41it/s] 36%|███▌ | 132919/371472 [10:33:49<19:55:20, 3.33it/s] 36%|███▌ | 132920/371472 [10:33:49<19:31:55, 3.39it/s] {'loss': 3.1784, 'learning_rate': 6.782736107220582e-07, 'epoch': 5.73} + 36%|███▌ | 132920/371472 [10:33:49<19:31:55, 3.39it/s] 36%|███▌ | 132921/371472 [10:33:49<21:06:54, 3.14it/s] 36%|███▌ | 132922/371472 [10:33:50<20:38:03, 3.21it/s] 36%|███▌ | 132923/371472 [10:33:50<21:13:02, 3.12it/s] 36%|███▌ | 132924/371472 [10:33:50<20:12:08, 3.28it/s] 36%|███▌ | 132925/371472 [10:33:51<20:27:31, 3.24it/s] 36%|███▌ | 132926/371472 [10:33:51<20:24:18, 3.25it/s] 36%|███▌ | 132927/371472 [10:33:51<19:50:45, 3.34it/s] 36%|███▌ | 132928/371472 [10:33:51<19:30:46, 3.40it/s] 36%|███▌ | 132929/371472 [10:33:52<20:09:24, 3.29it/s] 36%|███▌ | 132930/371472 [10:33:52<19:46:31, 3.35it/s] 36%|███▌ | 132931/371472 [10:33:52<19:04:40, 3.47it/s] 36%|███▌ | 132932/371472 [10:33:53<20:12:47, 3.28it/s] 36%|███▌ | 132933/371472 [10:33:53<19:42:27, 3.36it/s] 36%|███▌ | 132934/371472 [10:33:53<18:44:05, 3.54it/s] 36%|███▌ | 132935/371472 [10:33:53<18:58:15, 3.49it/s] 36%|███▌ | 132936/371472 [10:33:54<18:10:56, 3.64it/s] 36%|███▌ | 132937/371472 [10:33:54<18:26:47, 3.59it/s] 36%|███▌ | 132938/371472 [10:33:54<18:44:31, 3.54it/s] 36%|███▌ | 132939/371472 [10:33:55<19:37:59, 3.37it/s] 36%|███▌ | 132940/371472 [10:33:55<19:13:47, 3.45it/s] {'loss': 3.1761, 'learning_rate': 6.782251287465793e-07, 'epoch': 5.73} + 36%|███▌ | 132940/371472 [10:33:55<19:13:47, 3.45it/s] 36%|███▌ | 132941/371472 [10:33:55<19:34:14, 3.39it/s] 36%|███▌ | 132942/371472 [10:33:56<20:13:29, 3.28it/s] 36%|███▌ | 132943/371472 [10:33:56<21:23:10, 3.10it/s] 36%|███▌ | 132944/371472 [10:33:56<20:12:54, 3.28it/s] 36%|███▌ | 132945/371472 [10:33:56<19:10:42, 3.45it/s] 36%|███▌ | 132946/371472 [10:33:57<19:16:30, 3.44it/s] 36%|███▌ | 132947/371472 [10:33:57<18:35:29, 3.56it/s] 36%|███▌ | 132948/371472 [10:33:57<18:14:13, 3.63it/s] 36%|███▌ | 132949/371472 [10:33:58<19:34:12, 3.39it/s] 36%|███▌ | 132950/371472 [10:33:58<18:37:52, 3.56it/s] 36%|███▌ | 132951/371472 [10:33:58<17:59:32, 3.68it/s] 36%|███▌ | 132952/371472 [10:33:58<18:41:04, 3.55it/s] 36%|███▌ | 132953/371472 [10:33:59<17:45:01, 3.73it/s] 36%|███▌ | 132954/371472 [10:33:59<18:38:37, 3.55it/s] 36%|███▌ | 132955/371472 [10:33:59<18:41:15, 3.55it/s] 36%|███▌ | 132956/371472 [10:34:00<19:41:31, 3.36it/s] 36%|███▌ | 132957/371472 [10:34:00<19:02:43, 3.48it/s] 36%|███▌ | 132958/371472 [10:34:00<18:59:06, 3.49it/s] 36%|███▌ | 132959/371472 [10:34:00<20:53:26, 3.17it/s] 36%|███▌ | 132960/371472 [10:34:01<19:50:29, 3.34it/s] {'loss': 3.0853, 'learning_rate': 6.781766467711004e-07, 'epoch': 5.73} + 36%|███▌ | 132960/371472 [10:34:01<19:50:29, 3.34it/s] 36%|███▌ | 132961/371472 [10:34:01<19:09:55, 3.46it/s] 36%|███▌ | 132962/371472 [10:34:01<19:39:40, 3.37it/s] 36%|███▌ | 132963/371472 [10:34:02<19:24:18, 3.41it/s] 36%|███▌ | 132964/371472 [10:34:02<19:33:34, 3.39it/s] 36%|███▌ | 132965/371472 [10:34:02<19:56:46, 3.32it/s] 36%|███▌ | 132966/371472 [10:34:02<19:18:52, 3.43it/s] 36%|███▌ | 132967/371472 [10:34:03<19:37:46, 3.38it/s] 36%|███▌ | 132968/371472 [10:34:03<18:51:18, 3.51it/s] 36%|███▌ | 132969/371472 [10:34:03<18:16:14, 3.63it/s] 36%|███▌ | 132970/371472 [10:34:04<18:03:11, 3.67it/s] 36%|███▌ | 132971/371472 [10:34:04<18:03:47, 3.67it/s] 36%|███▌ | 132972/371472 [10:34:04<18:56:18, 3.50it/s] 36%|███▌ | 132973/371472 [10:34:04<18:05:14, 3.66it/s] 36%|███▌ | 132974/371472 [10:34:05<19:00:44, 3.48it/s] 36%|███▌ | 132975/371472 [10:34:05<18:26:45, 3.59it/s] 36%|███▌ | 132976/371472 [10:34:05<18:39:57, 3.55it/s] 36%|███▌ | 132977/371472 [10:34:06<18:13:25, 3.64it/s] 36%|███▌ | 132978/371472 [10:34:06<17:17:58, 3.83it/s] 36%|███▌ | 132979/371472 [10:34:06<17:30:57, 3.78it/s] 36%|███▌ | 132980/371472 [10:34:06<17:55:34, 3.70it/s] {'loss': 3.0961, 'learning_rate': 6.781281647956215e-07, 'epoch': 5.73} + 36%|███▌ | 132980/371472 [10:34:06<17:55:34, 3.70it/s] 36%|███▌ | 132981/371472 [10:34:07<18:00:08, 3.68it/s] 36%|███▌ | 132982/371472 [10:34:07<18:18:45, 3.62it/s] 36%|███▌ | 132983/371472 [10:34:07<17:39:43, 3.75it/s] 36%|███▌ | 132984/371472 [10:34:07<17:17:02, 3.83it/s] 36%|███▌ | 132985/371472 [10:34:08<17:25:30, 3.80it/s] 36%|███▌ | 132986/371472 [10:34:08<17:39:24, 3.75it/s] 36%|███▌ | 132987/371472 [10:34:08<18:53:24, 3.51it/s] 36%|███▌ | 132988/371472 [10:34:09<18:47:49, 3.52it/s] 36%|███▌ | 132989/371472 [10:34:09<18:38:16, 3.55it/s] 36%|███▌ | 132990/371472 [10:34:09<18:12:43, 3.64it/s] 36%|███▌ | 132991/371472 [10:34:09<17:44:09, 3.74it/s] 36%|███▌ | 132992/371472 [10:34:10<17:51:44, 3.71it/s] 36%|███▌ | 132993/371472 [10:34:10<18:03:53, 3.67it/s] 36%|███▌ | 132994/371472 [10:34:10<18:07:13, 3.66it/s] 36%|███▌ | 132995/371472 [10:34:10<17:31:17, 3.78it/s] 36%|███▌ | 132996/371472 [10:34:11<17:00:22, 3.90it/s] 36%|███▌ | 132997/371472 [10:34:11<16:43:23, 3.96it/s] 36%|███▌ | 132998/371472 [10:34:11<16:56:43, 3.91it/s] 36%|███▌ | 132999/371472 [10:34:11<17:11:58, 3.85it/s] 36%|███▌ | 133000/371472 [10:34:12<17:31:22, 3.78it/s] {'loss': 3.1643, 'learning_rate': 6.780796828201425e-07, 'epoch': 5.73} + 36%|███▌ | 133000/371472 [10:34:12<17:31:22, 3.78it/s] 36%|███▌ | 133001/371472 [10:34:12<17:13:42, 3.84it/s] 36%|███▌ | 133002/371472 [10:34:12<16:59:24, 3.90it/s] 36%|███▌ | 133003/371472 [10:34:13<18:38:59, 3.55it/s] 36%|███▌ | 133004/371472 [10:34:13<18:40:03, 3.55it/s] 36%|███▌ | 133005/371472 [10:34:13<18:05:19, 3.66it/s] 36%|███▌ | 133006/371472 [10:34:13<18:35:34, 3.56it/s] 36%|███▌ | 133007/371472 [10:34:14<20:13:29, 3.28it/s] 36%|███▌ | 133008/371472 [10:34:14<20:14:05, 3.27it/s] 36%|███▌ | 133009/371472 [10:34:14<19:45:36, 3.35it/s] 36%|███▌ | 133010/371472 [10:34:15<19:48:58, 3.34it/s] 36%|███▌ | 133011/371472 [10:34:15<19:05:38, 3.47it/s] 36%|███▌ | 133012/371472 [10:34:15<18:26:07, 3.59it/s] 36%|███▌ | 133013/371472 [10:34:15<20:15:27, 3.27it/s] 36%|███▌ | 133014/371472 [10:34:16<19:48:21, 3.34it/s] 36%|███▌ | 133015/371472 [10:34:16<19:05:32, 3.47it/s] 36%|███▌ | 133016/371472 [10:34:16<19:09:05, 3.46it/s] 36%|███▌ | 133017/371472 [10:34:17<19:17:01, 3.43it/s] 36%|███▌ | 133018/371472 [10:34:17<19:02:45, 3.48it/s] 36%|███▌ | 133019/371472 [10:34:17<18:48:06, 3.52it/s] 36%|███▌ | 133020/371472 [10:34:17<19:06:33, 3.47it/s] {'loss': 3.1659, 'learning_rate': 6.780312008446637e-07, 'epoch': 5.73} + 36%|███▌ | 133020/371472 [10:34:17<19:06:33, 3.47it/s] 36%|███▌ | 133021/371472 [10:34:18<19:18:31, 3.43it/s] 36%|███▌ | 133022/371472 [10:34:18<18:47:36, 3.52it/s] 36%|███▌ | 133023/371472 [10:34:18<18:44:59, 3.53it/s] 36%|███▌ | 133024/371472 [10:34:19<18:21:07, 3.61it/s] 36%|███▌ | 133025/371472 [10:34:19<17:46:02, 3.73it/s] 36%|███▌ | 133026/371472 [10:34:19<18:15:19, 3.63it/s] 36%|███▌ | 133027/371472 [10:34:19<18:03:54, 3.67it/s] 36%|███▌ | 133028/371472 [10:34:20<18:26:10, 3.59it/s] 36%|███▌ | 133029/371472 [10:34:20<18:23:04, 3.60it/s] 36%|███▌ | 133030/371472 [10:34:20<18:13:56, 3.63it/s] 36%|███▌ | 133031/371472 [10:34:21<18:54:02, 3.50it/s] 36%|███▌ | 133032/371472 [10:34:21<17:55:01, 3.70it/s] 36%|███▌ | 133033/371472 [10:34:21<18:16:43, 3.62it/s] 36%|███▌ | 133034/371472 [10:34:21<19:34:29, 3.38it/s] 36%|███▌ | 133035/371472 [10:34:22<19:03:14, 3.48it/s] 36%|███▌ | 133036/371472 [10:34:22<20:51:48, 3.17it/s] 36%|███▌ | 133037/371472 [10:34:22<20:16:54, 3.27it/s] 36%|███▌ | 133038/371472 [10:34:23<19:17:16, 3.43it/s] 36%|███▌ | 133039/371472 [10:34:23<18:43:45, 3.54it/s] 36%|███▌ | 133040/371472 [10:34:23<19:01:13, 3.48it/s] {'loss': 3.1038, 'learning_rate': 6.779827188691849e-07, 'epoch': 5.73} + 36%|███▌ | 133040/371472 [10:34:23<19:01:13, 3.48it/s] 36%|███▌ | 133041/371472 [10:34:23<18:22:33, 3.60it/s] 36%|███▌ | 133042/371472 [10:34:24<18:12:52, 3.64it/s] 36%|███▌ | 133043/371472 [10:34:24<17:31:56, 3.78it/s] 36%|███▌ | 133044/371472 [10:34:24<17:27:31, 3.79it/s] 36%|███▌ | 133045/371472 [10:34:24<16:49:04, 3.94it/s] 36%|███▌ | 133046/371472 [10:34:25<16:29:00, 4.02it/s] 36%|███▌ | 133047/371472 [10:34:25<17:53:59, 3.70it/s] 36%|███▌ | 133048/371472 [10:34:25<17:36:13, 3.76it/s] 36%|███▌ | 133049/371472 [10:34:25<17:35:09, 3.77it/s] 36%|███▌ | 133050/371472 [10:34:26<17:45:39, 3.73it/s] 36%|███▌ | 133051/371472 [10:34:26<17:52:35, 3.70it/s] 36%|███▌ | 133052/371472 [10:34:26<17:24:17, 3.81it/s] 36%|███▌ | 133053/371472 [10:34:27<17:00:49, 3.89it/s] 36%|███▌ | 133054/371472 [10:34:27<17:05:04, 3.88it/s] 36%|███▌ | 133055/371472 [10:34:27<17:03:52, 3.88it/s] 36%|███▌ | 133056/371472 [10:34:27<17:09:46, 3.86it/s] 36%|███▌ | 133057/371472 [10:34:28<17:18:01, 3.83it/s] 36%|███▌ | 133058/371472 [10:34:28<16:42:37, 3.96it/s] 36%|███▌ | 133059/371472 [10:34:28<21:08:11, 3.13it/s] 36%|███▌ | 133060/371472 [10:34:29<19:45:42, 3.35it/s] {'loss': 3.4505, 'learning_rate': 6.779342368937059e-07, 'epoch': 5.73} + 36%|███▌ | 133060/371472 [10:34:29<19:45:42, 3.35it/s] 36%|███▌ | 133061/371472 [10:34:29<19:43:11, 3.36it/s] 36%|███▌ | 133062/371472 [10:34:29<21:10:20, 3.13it/s] 36%|███▌ | 133063/371472 [10:34:29<19:32:20, 3.39it/s] 36%|███▌ | 133064/371472 [10:34:30<19:20:20, 3.42it/s] 36%|███▌ | 133065/371472 [10:34:30<18:47:26, 3.52it/s] 36%|███▌ | 133066/371472 [10:34:30<18:14:02, 3.63it/s] 36%|███▌ | 133067/371472 [10:34:31<19:32:10, 3.39it/s] 36%|███▌ | 133068/371472 [10:34:31<19:09:01, 3.46it/s] 36%|███▌ | 133069/371472 [10:34:31<19:41:06, 3.36it/s] 36%|███▌ | 133070/371472 [10:34:31<18:38:42, 3.55it/s] 36%|███▌ | 133071/371472 [10:34:32<19:10:34, 3.45it/s] 36%|███▌ | 133072/371472 [10:34:32<19:15:31, 3.44it/s] 36%|███▌ | 133073/371472 [10:34:32<18:42:00, 3.54it/s] 36%|███▌ | 133074/371472 [10:34:33<20:34:37, 3.22it/s] 36%|███▌ | 133075/371472 [10:34:33<20:08:26, 3.29it/s] 36%|███▌ | 133076/371472 [10:34:33<18:54:51, 3.50it/s] 36%|███▌ | 133077/371472 [10:34:33<18:11:36, 3.64it/s] 36%|███▌ | 133078/371472 [10:34:34<18:00:01, 3.68it/s] 36%|███▌ | 133079/371472 [10:34:34<17:22:07, 3.81it/s] 36%|███▌ | 133080/371472 [10:34:34<20:09:10, 3.29it/s] {'loss': 3.3092, 'learning_rate': 6.77885754918227e-07, 'epoch': 5.73} + 36%|███▌ | 133080/371472 [10:34:34<20:09:10, 3.29it/s] 36%|███▌ | 133081/371472 [10:34:35<19:48:00, 3.34it/s] 36%|███▌ | 133082/371472 [10:34:35<20:25:20, 3.24it/s] 36%|███▌ | 133083/371472 [10:34:35<19:05:36, 3.47it/s] 36%|███▌ | 133084/371472 [10:34:35<18:42:22, 3.54it/s] 36%|███▌ | 133085/371472 [10:34:36<19:26:54, 3.40it/s] 36%|███▌ | 133086/371472 [10:34:36<19:30:18, 3.39it/s] 36%|███▌ | 133087/371472 [10:34:36<18:29:15, 3.58it/s] 36%|███▌ | 133088/371472 [10:34:37<18:08:33, 3.65it/s] 36%|███▌ | 133089/371472 [10:34:37<18:29:02, 3.58it/s] 36%|███▌ | 133090/371472 [10:34:37<18:01:11, 3.67it/s] 36%|███▌ | 133091/371472 [10:34:37<18:21:20, 3.61it/s] 36%|███▌ | 133092/371472 [10:34:38<18:55:29, 3.50it/s] 36%|███▌ | 133093/371472 [10:34:38<19:26:26, 3.41it/s] 36%|███▌ | 133094/371472 [10:34:38<18:53:02, 3.51it/s] 36%|███▌ | 133095/371472 [10:34:39<18:13:58, 3.63it/s] 36%|███▌ | 133096/371472 [10:34:39<18:03:33, 3.67it/s] 36%|███▌ | 133097/371472 [10:34:39<18:01:16, 3.67it/s] 36%|███▌ | 133098/371472 [10:34:39<19:16:24, 3.44it/s] 36%|███▌ | 133099/371472 [10:34:40<18:39:54, 3.55it/s] 36%|███▌ | 133100/371472 [10:34:40<18:54:19, 3.50it/s] {'loss': 2.963, 'learning_rate': 6.778372729427481e-07, 'epoch': 5.73} + 36%|███▌ | 133100/371472 [10:34:40<18:54:19, 3.50it/s] 36%|███▌ | 133101/371472 [10:34:40<18:55:34, 3.50it/s] 36%|███▌ | 133102/371472 [10:34:41<18:51:40, 3.51it/s] 36%|███▌ | 133103/371472 [10:34:41<18:57:53, 3.49it/s] 36%|███▌ | 133104/371472 [10:34:41<19:06:30, 3.47it/s] 36%|███▌ | 133105/371472 [10:34:41<18:27:21, 3.59it/s] 36%|███▌ | 133106/371472 [10:34:42<18:01:12, 3.67it/s] 36%|███▌ | 133107/371472 [10:34:42<17:17:25, 3.83it/s] 36%|███▌ | 133108/371472 [10:34:42<16:59:43, 3.90it/s] 36%|███▌ | 133109/371472 [10:34:42<17:02:21, 3.89it/s] 36%|███▌ | 133110/371472 [10:34:43<17:57:08, 3.69it/s] 36%|███▌ | 133111/371472 [10:34:43<17:31:31, 3.78it/s] 36%|███▌ | 133112/371472 [10:34:43<17:15:26, 3.84it/s] 36%|███▌ | 133113/371472 [10:34:43<17:06:06, 3.87it/s] 36%|███▌ | 133114/371472 [10:34:44<16:54:34, 3.92it/s] 36%|███▌ | 133115/371472 [10:34:44<16:36:02, 3.99it/s] 36%|███▌ | 133116/371472 [10:34:44<16:44:55, 3.95it/s] 36%|███▌ | 133117/371472 [10:34:44<16:54:03, 3.92it/s] 36%|███▌ | 133118/371472 [10:34:45<16:42:02, 3.96it/s] 36%|███▌ | 133119/371472 [10:34:45<16:24:26, 4.04it/s] 36%|███▌ | 133120/371472 [10:34:45<17:18:46, 3.82it/s] {'loss': 3.2165, 'learning_rate': 6.777887909672693e-07, 'epoch': 5.73} + 36%|███▌ | 133120/371472 [10:34:45<17:18:46, 3.82it/s] 36%|███▌ | 133121/371472 [10:34:46<18:54:07, 3.50it/s] 36%|███▌ | 133122/371472 [10:34:46<18:56:02, 3.50it/s] 36%|███▌ | 133123/371472 [10:34:46<18:37:57, 3.55it/s] 36%|███▌ | 133124/371472 [10:34:46<19:47:02, 3.35it/s] 36%|███▌ | 133125/371472 [10:34:47<19:00:52, 3.48it/s] 36%|███▌ | 133126/371472 [10:34:47<18:26:57, 3.59it/s] 36%|███▌ | 133127/371472 [10:34:47<17:37:17, 3.76it/s] 36%|███▌ | 133128/371472 [10:34:47<17:26:41, 3.80it/s] 36%|███▌ | 133129/371472 [10:34:48<17:40:30, 3.75it/s] 36%|███▌ | 133130/371472 [10:34:48<18:13:53, 3.63it/s] 36%|███▌ | 133131/371472 [10:34:48<18:06:06, 3.66it/s] 36%|███▌ | 133132/371472 [10:34:49<17:50:17, 3.71it/s] 36%|███▌ | 133133/371472 [10:34:49<17:29:38, 3.78it/s] 36%|███▌ | 133134/371472 [10:34:49<17:24:29, 3.80it/s] 36%|███▌ | 133135/371472 [10:34:49<17:08:40, 3.86it/s] 36%|███▌ | 133136/371472 [10:34:50<17:22:14, 3.81it/s] 36%|███▌ | 133137/371472 [10:34:50<19:47:26, 3.35it/s] 36%|███▌ | 133138/371472 [10:34:50<20:37:08, 3.21it/s] 36%|███▌ | 133139/371472 [10:34:51<19:33:05, 3.39it/s] 36%|███▌ | 133140/371472 [10:34:51<19:39:19, 3.37it/s] {'loss': 3.0454, 'learning_rate': 6.777403089917903e-07, 'epoch': 5.73} + 36%|███▌ | 133140/371472 [10:34:51<19:39:19, 3.37it/s] 36%|███▌ | 133141/371472 [10:34:51<19:03:57, 3.47it/s] 36%|███▌ | 133142/371472 [10:34:52<20:43:33, 3.19it/s] 36%|███▌ | 133143/371472 [10:34:52<19:16:46, 3.43it/s] 36%|███▌ | 133144/371472 [10:34:52<18:19:18, 3.61it/s] 36%|███▌ | 133145/371472 [10:34:52<17:48:21, 3.72it/s] 36%|███▌ | 133146/371472 [10:34:53<18:15:09, 3.63it/s] 36%|███▌ | 133147/371472 [10:34:53<18:15:17, 3.63it/s] 36%|███▌ | 133148/371472 [10:34:53<18:07:56, 3.65it/s] 36%|███▌ | 133149/371472 [10:34:53<17:47:59, 3.72it/s] 36%|███▌ | 133150/371472 [10:34:54<18:00:06, 3.68it/s] 36%|███▌ | 133151/371472 [10:34:54<19:29:45, 3.40it/s] 36%|███▌ | 133152/371472 [10:34:54<19:00:18, 3.48it/s] 36%|███▌ | 133153/371472 [10:34:55<18:09:55, 3.64it/s] 36%|███▌ | 133154/371472 [10:34:55<18:09:46, 3.64it/s] 36%|███▌ | 133155/371472 [10:34:55<18:43:37, 3.53it/s] 36%|███▌ | 133156/371472 [10:34:55<18:17:15, 3.62it/s] 36%|███▌ | 133157/371472 [10:34:56<18:43:42, 3.53it/s] 36%|███▌ | 133158/371472 [10:34:56<18:26:59, 3.59it/s] 36%|███▌ | 133159/371472 [10:34:56<17:44:59, 3.73it/s] 36%|███▌ | 133160/371472 [10:34:56<17:18:07, 3.83it/s] {'loss': 3.2645, 'learning_rate': 6.776918270163114e-07, 'epoch': 5.74} + 36%|███▌ | 133160/371472 [10:34:56<17:18:07, 3.83it/s] 36%|███▌ | 133161/371472 [10:34:57<17:26:15, 3.80it/s] 36%|███▌ | 133162/371472 [10:34:57<19:20:18, 3.42it/s] 36%|███▌ | 133163/371472 [10:34:57<18:48:20, 3.52it/s] 36%|███▌ | 133164/371472 [10:34:58<18:54:27, 3.50it/s] 36%|███▌ | 133165/371472 [10:34:58<18:22:13, 3.60it/s] 36%|███▌ | 133166/371472 [10:34:58<19:15:28, 3.44it/s] 36%|███▌ | 133167/371472 [10:34:58<19:31:09, 3.39it/s] 36%|███▌ | 133168/371472 [10:34:59<18:33:38, 3.57it/s] 36%|███▌ | 133169/371472 [10:34:59<18:39:16, 3.55it/s] 36%|███▌ | 133170/371472 [10:34:59<18:19:54, 3.61it/s] 36%|███▌ | 133171/371472 [10:35:00<18:22:09, 3.60it/s] 36%|███▌ | 133172/371472 [10:35:00<18:44:00, 3.53it/s] 36%|███▌ | 133173/371472 [10:35:00<17:58:36, 3.68it/s] 36%|███▌ | 133174/371472 [10:35:00<18:00:43, 3.67it/s] 36%|███▌ | 133175/371472 [10:35:01<18:28:43, 3.58it/s] 36%|███▌ | 133176/371472 [10:35:01<19:38:52, 3.37it/s] 36%|███▌ | 133177/371472 [10:35:01<19:13:25, 3.44it/s] 36%|███▌ | 133178/371472 [10:35:02<19:54:58, 3.32it/s] 36%|███▌ | 133179/371472 [10:35:02<18:52:25, 3.51it/s] 36%|███▌ | 133180/371472 [10:35:02<18:55:34, 3.50it/s] {'loss': 3.1076, 'learning_rate': 6.776433450408326e-07, 'epoch': 5.74} + 36%|███▌ | 133180/371472 [10:35:02<18:55:34, 3.50it/s] 36%|███▌ | 133181/371472 [10:35:02<18:33:43, 3.57it/s] 36%|███▌ | 133182/371472 [10:35:03<18:08:09, 3.65it/s] 36%|███▌ | 133183/371472 [10:35:03<18:03:59, 3.66it/s] 36%|███▌ | 133184/371472 [10:35:03<18:08:13, 3.65it/s] 36%|███▌ | 133185/371472 [10:35:03<17:57:21, 3.69it/s] 36%|███▌ | 133186/371472 [10:35:04<19:04:51, 3.47it/s] 36%|███▌ | 133187/371472 [10:35:04<19:28:19, 3.40it/s] 36%|███▌ | 133188/371472 [10:35:04<19:04:15, 3.47it/s] 36%|███▌ | 133189/371472 [10:35:05<19:53:12, 3.33it/s] 36%|███▌ | 133190/371472 [10:35:05<19:36:24, 3.38it/s] 36%|███▌ | 133191/371472 [10:35:05<18:45:55, 3.53it/s] 36%|███▌ | 133192/371472 [10:35:06<20:08:40, 3.29it/s] 36%|███▌ | 133193/371472 [10:35:06<20:01:59, 3.30it/s] 36%|███▌ | 133194/371472 [10:35:06<20:15:35, 3.27it/s] 36%|███▌ | 133195/371472 [10:35:06<19:28:00, 3.40it/s] 36%|███▌ | 133196/371472 [10:35:07<18:37:20, 3.55it/s] 36%|███▌ | 133197/371472 [10:35:07<17:57:20, 3.69it/s] 36%|███▌ | 133198/371472 [10:35:07<17:27:33, 3.79it/s] 36%|███▌ | 133199/371472 [10:35:08<17:58:31, 3.68it/s] 36%|███▌ | 133200/371472 [10:35:08<18:48:42, 3.52it/s] {'loss': 3.1345, 'learning_rate': 6.775948630653538e-07, 'epoch': 5.74} + 36%|███▌ | 133200/371472 [10:35:08<18:48:42, 3.52it/s] 36%|███▌ | 133201/371472 [10:35:08<18:43:47, 3.53it/s] 36%|███▌ | 133202/371472 [10:35:08<18:27:14, 3.59it/s] 36%|███▌ | 133203/371472 [10:35:09<18:15:45, 3.62it/s] 36%|███▌ | 133204/371472 [10:35:09<19:02:39, 3.48it/s] 36%|███▌ | 133205/371472 [10:35:09<18:48:53, 3.52it/s] 36%|███▌ | 133206/371472 [10:35:10<19:14:51, 3.44it/s] 36%|███▌ | 133207/371472 [10:35:10<19:12:51, 3.44it/s] 36%|███▌ | 133208/371472 [10:35:10<18:23:06, 3.60it/s] 36%|███▌ | 133209/371472 [10:35:10<19:06:58, 3.46it/s] 36%|███▌ | 133210/371472 [10:35:11<18:49:42, 3.52it/s] 36%|███▌ | 133211/371472 [10:35:11<18:05:05, 3.66it/s] 36%|███▌ | 133212/371472 [10:35:11<17:55:24, 3.69it/s] 36%|███▌ | 133213/371472 [10:35:11<18:15:09, 3.63it/s] 36%|███▌ | 133214/371472 [10:35:12<18:49:41, 3.52it/s] 36%|███▌ | 133215/371472 [10:35:12<19:39:37, 3.37it/s] 36%|███▌ | 133216/371472 [10:35:12<20:25:01, 3.24it/s] 36%|███▌ | 133217/371472 [10:35:13<20:27:30, 3.23it/s] 36%|███▌ | 133218/371472 [10:35:13<19:44:34, 3.35it/s] 36%|███▌ | 133219/371472 [10:35:13<19:16:28, 3.43it/s] 36%|███▌ | 133220/371472 [10:35:14<19:46:21, 3.35it/s] {'loss': 3.1146, 'learning_rate': 6.775463810898747e-07, 'epoch': 5.74} + 36%|███▌ | 133220/371472 [10:35:14<19:46:21, 3.35it/s] 36%|███▌ | 133221/371472 [10:35:14<18:53:41, 3.50it/s] 36%|███▌ | 133222/371472 [10:35:14<19:22:55, 3.41it/s] 36%|███▌ | 133223/371472 [10:35:14<18:33:46, 3.57it/s] 36%|███▌ | 133224/371472 [10:35:15<17:56:29, 3.69it/s] 36%|███▌ | 133225/371472 [10:35:15<18:14:41, 3.63it/s] 36%|███▌ | 133226/371472 [10:35:15<18:01:57, 3.67it/s] 36%|███▌ | 133227/371472 [10:35:16<18:06:02, 3.66it/s] 36%|███▌ | 133228/371472 [10:35:16<19:46:11, 3.35it/s] 36%|███▌ | 133229/371472 [10:35:16<19:06:39, 3.46it/s] 36%|███▌ | 133230/371472 [10:35:16<20:18:04, 3.26it/s] 36%|███▌ | 133231/371472 [10:35:17<19:43:18, 3.36it/s] 36%|███▌ | 133232/371472 [10:35:17<19:02:04, 3.48it/s] 36%|███▌ | 133233/371472 [10:35:17<18:27:12, 3.59it/s] 36%|███▌ | 133234/371472 [10:35:18<18:35:21, 3.56it/s] 36%|███▌ | 133235/371472 [10:35:18<18:30:45, 3.57it/s] 36%|███▌ | 133236/371472 [10:35:18<17:40:57, 3.74it/s] 36%|███▌ | 133237/371472 [10:35:18<17:40:58, 3.74it/s] 36%|███▌ | 133238/371472 [10:35:19<17:14:11, 3.84it/s] 36%|███▌ | 133239/371472 [10:35:19<16:37:06, 3.98it/s] 36%|███▌ | 133240/371472 [10:35:19<16:59:55, 3.89it/s] {'loss': 3.4338, 'learning_rate': 6.774978991143958e-07, 'epoch': 5.74} + 36%|███▌ | 133240/371472 [10:35:19<16:59:55, 3.89it/s] 36%|███▌ | 133241/371472 [10:35:19<17:20:42, 3.82it/s] 36%|███▌ | 133242/371472 [10:35:20<19:27:00, 3.40it/s] 36%|███▌ | 133243/371472 [10:35:20<18:55:00, 3.50it/s] 36%|███▌ | 133244/371472 [10:35:20<18:50:27, 3.51it/s] 36%|███▌ | 133245/371472 [10:35:21<18:52:34, 3.51it/s] 36%|███▌ | 133246/371472 [10:35:21<18:29:59, 3.58it/s] 36%|███▌ | 133247/371472 [10:35:21<18:30:43, 3.57it/s] 36%|███▌ | 133248/371472 [10:35:21<18:03:21, 3.66it/s] 36%|███▌ | 133249/371472 [10:35:22<18:56:04, 3.49it/s] 36%|███▌ | 133250/371472 [10:35:22<18:20:00, 3.61it/s] 36%|███▌ | 133251/371472 [10:35:22<18:03:30, 3.66it/s] 36%|███▌ | 133252/371472 [10:35:22<18:01:58, 3.67it/s] 36%|███▌ | 133253/371472 [10:35:23<18:17:30, 3.62it/s] 36%|███▌ | 133254/371472 [10:35:23<18:17:54, 3.62it/s] 36%|███▌ | 133255/371472 [10:35:23<17:41:36, 3.74it/s] 36%|███▌ | 133256/371472 [10:35:24<17:39:44, 3.75it/s] 36%|███▌ | 133257/371472 [10:35:24<17:20:32, 3.82it/s] 36%|███▌ | 133258/371472 [10:35:24<17:19:08, 3.82it/s] 36%|███▌ | 133259/371472 [10:35:24<16:56:11, 3.91it/s] 36%|███▌ | 133260/371472 [10:35:25<18:03:47, 3.66it/s] {'loss': 3.1648, 'learning_rate': 6.77449417138917e-07, 'epoch': 5.74} + 36%|███▌ | 133260/371472 [10:35:25<18:03:47, 3.66it/s] 36%|███▌ | 133261/371472 [10:35:25<18:08:45, 3.65it/s] 36%|███▌ | 133262/371472 [10:35:25<18:12:53, 3.63it/s] 36%|███▌ | 133263/371472 [10:35:25<18:03:14, 3.67it/s] 36%|███▌ | 133264/371472 [10:35:26<17:33:51, 3.77it/s] 36%|███▌ | 133265/371472 [10:35:26<18:12:22, 3.63it/s] 36%|███▌ | 133266/371472 [10:35:26<18:45:23, 3.53it/s] 36%|███▌ | 133267/371472 [10:35:27<18:42:55, 3.54it/s] 36%|███▌ | 133268/371472 [10:35:27<19:42:27, 3.36it/s] 36%|███▌ | 133269/371472 [10:35:27<19:34:31, 3.38it/s] 36%|███▌ | 133270/371472 [10:35:28<20:20:43, 3.25it/s] 36%|███▌ | 133271/371472 [10:35:28<20:08:13, 3.29it/s] 36%|███▌ | 133272/371472 [10:35:28<19:36:17, 3.37it/s] 36%|███▌ | 133273/371472 [10:35:28<19:44:24, 3.35it/s] 36%|███▌ | 133274/371472 [10:35:29<19:39:10, 3.37it/s] 36%|███▌ | 133275/371472 [10:35:29<19:12:22, 3.45it/s] 36%|███▌ | 133276/371472 [10:35:29<18:21:10, 3.61it/s] 36%|███▌ | 133277/371472 [10:35:29<18:10:12, 3.64it/s] 36%|███▌ | 133278/371472 [10:35:30<20:27:08, 3.24it/s] 36%|███▌ | 133279/371472 [10:35:30<20:17:52, 3.26it/s] 36%|███▌ | 133280/371472 [10:35:30<19:26:54, 3.40it/s] {'loss': 3.1633, 'learning_rate': 6.774009351634382e-07, 'epoch': 5.74} + 36%|███▌ | 133280/371472 [10:35:30<19:26:54, 3.40it/s] 36%|███▌ | 133281/371472 [10:35:31<19:26:18, 3.40it/s] 36%|███▌ | 133282/371472 [10:35:31<18:20:08, 3.61it/s] 36%|███▌ | 133283/371472 [10:35:31<17:55:20, 3.69it/s] 36%|███▌ | 133284/371472 [10:35:32<18:36:23, 3.56it/s] 36%|███▌ | 133285/371472 [10:35:32<19:38:02, 3.37it/s] 36%|███▌ | 133286/371472 [10:35:32<18:55:09, 3.50it/s] 36%|███▌ | 133287/371472 [10:35:32<18:30:28, 3.57it/s] 36%|███▌ | 133288/371472 [10:35:33<17:45:42, 3.72it/s] 36%|███▌ | 133289/371472 [10:35:33<17:30:21, 3.78it/s] 36%|███▌ | 133290/371472 [10:35:33<17:31:08, 3.78it/s] 36%|███▌ | 133291/371472 [10:35:33<18:28:56, 3.58it/s] 36%|███▌ | 133292/371472 [10:35:34<18:50:02, 3.51it/s] 36%|███▌ | 133293/371472 [10:35:34<18:26:10, 3.59it/s] 36%|███▌ | 133294/371472 [10:35:34<19:23:22, 3.41it/s] 36%|███▌ | 133295/371472 [10:35:35<18:32:20, 3.57it/s] 36%|███▌ | 133296/371472 [10:35:35<18:13:09, 3.63it/s] 36%|███▌ | 133297/371472 [10:35:35<17:34:24, 3.76it/s] 36%|███▌ | 133298/371472 [10:35:35<18:23:56, 3.60it/s] 36%|███▌ | 133299/371472 [10:35:36<18:19:55, 3.61it/s] 36%|███▌ | 133300/371472 [10:35:36<18:03:26, 3.66it/s] {'loss': 2.9923, 'learning_rate': 6.773524531879592e-07, 'epoch': 5.74} + 36%|███▌ | 133300/371472 [10:35:36<18:03:26, 3.66it/s] 36%|███▌ | 133301/371472 [10:35:36<18:24:47, 3.59it/s] 36%|███▌ | 133302/371472 [10:35:37<18:11:52, 3.64it/s] 36%|███▌ | 133303/371472 [10:35:37<17:55:17, 3.69it/s] 36%|███▌ | 133304/371472 [10:35:37<18:15:54, 3.62it/s] 36%|███▌ | 133305/371472 [10:35:37<17:51:18, 3.71it/s] 36%|███▌ | 133306/371472 [10:35:38<17:26:46, 3.79it/s] 36%|███▌ | 133307/371472 [10:35:38<17:50:57, 3.71it/s] 36%|███▌ | 133308/371472 [10:35:38<19:00:47, 3.48it/s] 36%|███▌ | 133309/371472 [10:35:38<18:16:36, 3.62it/s] 36%|███▌ | 133310/371472 [10:35:39<17:51:53, 3.70it/s] 36%|███▌ | 133311/371472 [10:35:39<17:35:57, 3.76it/s] 36%|███▌ | 133312/371472 [10:35:39<17:45:02, 3.73it/s] 36%|███▌ | 133313/371472 [10:35:39<17:25:41, 3.80it/s] 36%|███▌ | 133314/371472 [10:35:40<17:39:04, 3.75it/s] 36%|███▌ | 133315/371472 [10:35:40<18:31:41, 3.57it/s] 36%|███▌ | 133316/371472 [10:35:40<17:45:58, 3.72it/s] 36%|███▌ | 133317/371472 [10:35:41<17:19:31, 3.82it/s] 36%|███▌ | 133318/371472 [10:35:41<19:38:05, 3.37it/s] 36%|███▌ | 133319/371472 [10:35:41<18:49:53, 3.51it/s] 36%|███▌ | 133320/371472 [10:35:41<18:00:58, 3.67it/s] {'loss': 3.072, 'learning_rate': 6.773039712124803e-07, 'epoch': 5.74} + 36%|███▌ | 133320/371472 [10:35:41<18:00:58, 3.67it/s] 36%|███▌ | 133321/371472 [10:35:42<17:23:54, 3.80it/s] 36%|███▌ | 133322/371472 [10:35:42<17:47:11, 3.72it/s] 36%|███▌ | 133323/371472 [10:35:42<18:02:11, 3.67it/s] 36%|███▌ | 133324/371472 [10:35:42<17:46:28, 3.72it/s] 36%|███▌ | 133325/371472 [10:35:43<18:04:17, 3.66it/s] 36%|███▌ | 133326/371472 [10:35:43<18:05:33, 3.66it/s] 36%|███▌ | 133327/371472 [10:35:43<18:54:26, 3.50it/s] 36%|███▌ | 133328/371472 [10:35:44<18:50:55, 3.51it/s] 36%|███▌ | 133329/371472 [10:35:44<18:53:17, 3.50it/s] 36%|███▌ | 133330/371472 [10:35:44<18:07:27, 3.65it/s] 36%|███▌ | 133331/371472 [10:35:44<17:39:38, 3.75it/s] 36%|███▌ | 133332/371472 [10:35:45<18:58:07, 3.49it/s] 36%|███▌ | 133333/371472 [10:35:45<18:11:38, 3.64it/s] 36%|███▌ | 133334/371472 [10:35:45<17:59:22, 3.68it/s] 36%|███▌ | 133335/371472 [10:35:46<18:59:19, 3.48it/s] 36%|███▌ | 133336/371472 [10:35:46<19:52:50, 3.33it/s] 36%|███▌ | 133337/371472 [10:35:46<19:18:24, 3.43it/s] 36%|███▌ | 133338/371472 [10:35:46<19:24:27, 3.41it/s] 36%|███▌ | 133339/371472 [10:35:47<20:49:20, 3.18it/s] 36%|███▌ | 133340/371472 [10:35:47<19:21:56, 3.42it/s] {'loss': 3.0225, 'learning_rate': 6.772554892370015e-07, 'epoch': 5.74} + 36%|███▌ | 133340/371472 [10:35:47<19:21:56, 3.42it/s] 36%|███▌ | 133341/371472 [10:35:47<18:29:41, 3.58it/s] 36%|███▌ | 133342/371472 [10:35:48<17:59:57, 3.68it/s] 36%|███▌ | 133343/371472 [10:35:48<18:29:51, 3.58it/s] 36%|███▌ | 133344/371472 [10:35:48<18:37:57, 3.55it/s] 36%|███▌ | 133345/371472 [10:35:49<19:14:26, 3.44it/s] 36%|███▌ | 133346/371472 [10:35:49<18:39:57, 3.54it/s] 36%|███▌ | 133347/371472 [10:35:49<17:49:31, 3.71it/s] 36%|███▌ | 133348/371472 [10:35:49<18:59:27, 3.48it/s] 36%|███▌ | 133349/371472 [10:35:50<18:38:58, 3.55it/s] 36%|███▌ | 133350/371472 [10:35:50<18:30:44, 3.57it/s] 36%|███▌ | 133351/371472 [10:35:50<17:40:37, 3.74it/s] 36%|███▌ | 133352/371472 [10:35:50<17:52:51, 3.70it/s] 36%|███▌ | 133353/371472 [10:35:51<17:15:06, 3.83it/s] 36%|███▌ | 133354/371472 [10:35:51<16:47:56, 3.94it/s] 36%|███▌ | 133355/371472 [10:35:51<17:44:54, 3.73it/s] 36%|███▌ | 133356/371472 [10:35:51<18:12:21, 3.63it/s] 36%|███▌ | 133357/371472 [10:35:52<19:05:12, 3.47it/s] 36%|███▌ | 133358/371472 [10:35:52<18:54:42, 3.50it/s] 36%|███▌ | 133359/371472 [10:35:52<18:37:39, 3.55it/s] 36%|███▌ | 133360/371472 [10:35:53<20:37:56, 3.21it/s] {'loss': 3.1442, 'learning_rate': 6.772070072615225e-07, 'epoch': 5.74} + 36%|███▌ | 133360/371472 [10:35:53<20:37:56, 3.21it/s] 36%|███▌ | 133361/371472 [10:35:53<22:01:24, 3.00it/s] 36%|███▌ | 133362/371472 [10:35:53<20:21:49, 3.25it/s] 36%|███▌ | 133363/371472 [10:35:54<19:26:48, 3.40it/s] 36%|███▌ | 133364/371472 [10:35:54<20:07:08, 3.29it/s] 36%|███▌ | 133365/371472 [10:35:54<19:25:59, 3.40it/s] 36%|███▌ | 133366/371472 [10:35:54<18:47:21, 3.52it/s] 36%|███▌ | 133367/371472 [10:35:55<19:01:56, 3.48it/s] 36%|███▌ | 133368/371472 [10:35:55<19:07:02, 3.46it/s] 36%|███▌ | 133369/371472 [10:35:55<18:34:55, 3.56it/s] 36%|███▌ | 133370/371472 [10:35:56<18:06:51, 3.65it/s] 36%|███▌ | 133371/371472 [10:35:56<17:26:12, 3.79it/s] 36%|███▌ | 133372/371472 [10:35:56<17:25:17, 3.80it/s] 36%|███▌ | 133373/371472 [10:35:56<17:43:33, 3.73it/s] 36%|███▌ | 133374/371472 [10:35:57<20:13:23, 3.27it/s] 36%|███▌ | 133375/371472 [10:35:57<19:45:46, 3.35it/s] 36%|███▌ | 133376/371472 [10:35:57<19:04:43, 3.47it/s] 36%|███▌ | 133377/371472 [10:35:58<19:01:19, 3.48it/s] 36%|███▌ | 133378/371472 [10:35:58<18:07:18, 3.65it/s] 36%|███▌ | 133379/371472 [10:35:58<18:09:34, 3.64it/s] 36%|███▌ | 133380/371472 [10:35:58<18:57:22, 3.49it/s] {'loss': 3.1921, 'learning_rate': 6.771585252860437e-07, 'epoch': 5.74} + 36%|███▌ | 133380/371472 [10:35:58<18:57:22, 3.49it/s] 36%|███▌ | 133381/371472 [10:35:59<19:21:10, 3.42it/s] 36%|███▌ | 133382/371472 [10:35:59<19:22:02, 3.41it/s] 36%|███▌ | 133383/371472 [10:35:59<18:27:30, 3.58it/s] 36%|███▌ | 133384/371472 [10:36:00<17:53:17, 3.70it/s] 36%|███▌ | 133385/371472 [10:36:00<17:38:27, 3.75it/s] 36%|███▌ | 133386/371472 [10:36:00<16:52:58, 3.92it/s] 36%|███▌ | 133387/371472 [10:36:00<17:02:39, 3.88it/s] 36%|███▌ | 133388/371472 [10:36:00<16:33:48, 3.99it/s] 36%|███▌ | 133389/371472 [10:36:01<16:45:37, 3.95it/s] 36%|███▌ | 133390/371472 [10:36:01<17:38:19, 3.75it/s] 36%|███▌ | 133391/371472 [10:36:01<18:55:25, 3.49it/s] 36%|███▌ | 133392/371472 [10:36:02<18:16:58, 3.62it/s] 36%|███▌ | 133393/371472 [10:36:02<18:45:51, 3.52it/s] 36%|███▌ | 133394/371472 [10:36:02<18:03:21, 3.66it/s] 36%|███▌ | 133395/371472 [10:36:02<18:22:41, 3.60it/s] 36%|███▌ | 133396/371472 [10:36:03<18:05:44, 3.65it/s] 36%|███▌ | 133397/371472 [10:36:03<19:37:19, 3.37it/s] 36%|███▌ | 133398/371472 [10:36:03<18:50:25, 3.51it/s] 36%|███▌ | 133399/371472 [10:36:04<18:23:04, 3.60it/s] 36%|███▌ | 133400/371472 [10:36:04<17:38:47, 3.75it/s] {'loss': 3.2415, 'learning_rate': 6.771100433105647e-07, 'epoch': 5.75} + 36%|███▌ | 133400/371472 [10:36:04<17:38:47, 3.75it/s] 36%|███▌ | 133401/371472 [10:36:04<18:22:53, 3.60it/s] 36%|███▌ | 133402/371472 [10:36:04<18:36:53, 3.55it/s] 36%|███▌ | 133403/371472 [10:36:05<18:30:07, 3.57it/s] 36%|███▌ | 133404/371472 [10:36:05<18:31:28, 3.57it/s] 36%|███▌ | 133405/371472 [10:36:05<17:55:28, 3.69it/s] 36%|███▌ | 133406/371472 [10:36:06<18:10:54, 3.64it/s] 36%|███▌ | 133407/371472 [10:36:06<17:30:59, 3.78it/s] 36%|███▌ | 133408/371472 [10:36:06<17:39:15, 3.75it/s] 36%|███▌ | 133409/371472 [10:36:06<17:59:49, 3.67it/s] 36%|███▌ | 133410/371472 [10:36:07<17:45:32, 3.72it/s] 36%|███▌ | 133411/371472 [10:36:07<18:17:23, 3.62it/s] 36%|███▌ | 133412/371472 [10:36:07<19:17:45, 3.43it/s] 36%|███▌ | 133413/371472 [10:36:07<18:47:08, 3.52it/s] 36%|███▌ | 133414/371472 [10:36:08<18:02:23, 3.67it/s] 36%|███▌ | 133415/371472 [10:36:08<18:09:57, 3.64it/s] 36%|███▌ | 133416/371472 [10:36:08<18:37:27, 3.55it/s] 36%|███▌ | 133417/371472 [10:36:09<19:28:54, 3.39it/s] 36%|███▌ | 133418/371472 [10:36:09<19:31:47, 3.39it/s] 36%|███▌ | 133419/371472 [10:36:09<18:20:38, 3.60it/s] 36%|███▌ | 133420/371472 [10:36:09<19:14:05, 3.44it/s] {'loss': 3.2781, 'learning_rate': 6.770615613350859e-07, 'epoch': 5.75} + 36%|███▌ | 133420/371472 [10:36:09<19:14:05, 3.44it/s] 36%|███▌ | 133421/371472 [10:36:10<19:04:26, 3.47it/s] 36%|███▌ | 133422/371472 [10:36:10<18:48:08, 3.52it/s] 36%|███▌ | 133423/371472 [10:36:10<18:29:24, 3.58it/s] 36%|███▌ | 133424/371472 [10:36:11<17:48:30, 3.71it/s] 36%|███▌ | 133425/371472 [10:36:11<17:10:16, 3.85it/s] 36%|███▌ | 133426/371472 [10:36:11<17:10:33, 3.85it/s] 36%|███▌ | 133427/371472 [10:36:11<16:50:54, 3.92it/s] 36%|███▌ | 133428/371472 [10:36:12<17:32:23, 3.77it/s] 36%|███▌ | 133429/371472 [10:36:12<17:44:05, 3.73it/s] 36%|███▌ | 133430/371472 [10:36:12<18:30:12, 3.57it/s] 36%|███▌ | 133431/371472 [10:36:13<19:29:44, 3.39it/s] 36%|███▌ | 133432/371472 [10:36:13<19:03:52, 3.47it/s] 36%|███▌ | 133433/371472 [10:36:13<19:00:43, 3.48it/s] 36%|███▌ | 133434/371472 [10:36:13<18:40:18, 3.54it/s] 36%|███▌ | 133435/371472 [10:36:14<19:27:35, 3.40it/s] 36%|███▌ | 133436/371472 [10:36:14<19:42:02, 3.36it/s] 36%|███▌ | 133437/371472 [10:36:14<19:02:35, 3.47it/s] 36%|███▌ | 133438/371472 [10:36:15<19:36:51, 3.37it/s] 36%|███▌ | 133439/371472 [10:36:15<19:15:54, 3.43it/s] 36%|███▌ | 133440/371472 [10:36:15<18:54:31, 3.50it/s] {'loss': 3.1377, 'learning_rate': 6.77013079359607e-07, 'epoch': 5.75} + 36%|███▌ | 133440/371472 [10:36:15<18:54:31, 3.50it/s] 36%|███▌ | 133441/371472 [10:36:15<19:31:56, 3.39it/s] 36%|███▌ | 133442/371472 [10:36:16<20:43:33, 3.19it/s] 36%|███▌ | 133443/371472 [10:36:16<19:35:38, 3.37it/s] 36%|███▌ | 133444/371472 [10:36:16<18:54:43, 3.50it/s] 36%|███▌ | 133445/371472 [10:36:17<18:15:57, 3.62it/s] 36%|███▌ | 133446/371472 [10:36:17<18:16:26, 3.62it/s] 36%|███▌ | 133447/371472 [10:36:17<18:02:42, 3.66it/s] 36%|███▌ | 133448/371472 [10:36:17<17:50:58, 3.70it/s] 36%|███▌ | 133449/371472 [10:36:18<17:57:26, 3.68it/s] 36%|███▌ | 133450/371472 [10:36:18<18:36:46, 3.55it/s] 36%|███▌ | 133451/371472 [10:36:18<18:47:37, 3.52it/s] 36%|███▌ | 133452/371472 [10:36:18<18:42:29, 3.53it/s] 36%|███▌ | 133453/371472 [10:36:19<17:50:29, 3.71it/s] 36%|███▌ | 133454/371472 [10:36:19<17:25:34, 3.79it/s] 36%|███▌ | 133455/371472 [10:36:19<17:59:08, 3.68it/s] 36%|███▌ | 133456/371472 [10:36:20<18:25:33, 3.59it/s] 36%|███▌ | 133457/371472 [10:36:20<20:33:03, 3.22it/s] 36%|███▌ | 133458/371472 [10:36:20<19:25:04, 3.40it/s] 36%|███▌ | 133459/371472 [10:36:21<19:36:45, 3.37it/s] 36%|███▌ | 133460/371472 [10:36:21<20:22:20, 3.25it/s] {'loss': 3.009, 'learning_rate': 6.769645973841281e-07, 'epoch': 5.75} + 36%|███▌ | 133460/371472 [10:36:21<20:22:20, 3.25it/s] 36%|███▌ | 133461/371472 [10:36:21<19:16:28, 3.43it/s] 36%|███▌ | 133462/371472 [10:36:21<18:29:19, 3.58it/s] 36%|███▌ | 133463/371472 [10:36:22<17:45:25, 3.72it/s] 36%|███▌ | 133464/371472 [10:36:22<17:45:07, 3.72it/s] 36%|███▌ | 133465/371472 [10:36:22<18:07:23, 3.65it/s] 36%|███▌ | 133466/371472 [10:36:22<19:08:50, 3.45it/s] 36%|███▌ | 133467/371472 [10:36:23<19:11:36, 3.44it/s] 36%|███▌ | 133468/371472 [10:36:23<19:54:21, 3.32it/s] 36%|███▌ | 133469/371472 [10:36:23<19:38:10, 3.37it/s] 36%|███▌ | 133470/371472 [10:36:24<19:44:50, 3.35it/s] 36%|███▌ | 133471/371472 [10:36:24<18:59:29, 3.48it/s] 36%|███▌ | 133472/371472 [10:36:24<19:45:50, 3.35it/s] 36%|███▌ | 133473/371472 [10:36:25<20:19:21, 3.25it/s] 36%|███▌ | 133474/371472 [10:36:25<19:28:08, 3.40it/s] 36%|███▌ | 133475/371472 [10:36:25<18:34:45, 3.56it/s] 36%|███▌ | 133476/371472 [10:36:25<18:22:44, 3.60it/s] 36%|███▌ | 133477/371472 [10:36:26<18:01:55, 3.67it/s] 36%|███▌ | 133478/371472 [10:36:26<17:57:55, 3.68it/s] 36%|███▌ | 133479/371472 [10:36:26<18:01:05, 3.67it/s] 36%|███▌ | 133480/371472 [10:36:26<17:11:54, 3.84it/s] {'loss': 2.9462, 'learning_rate': 6.769161154086491e-07, 'epoch': 5.75} + 36%|███▌ | 133480/371472 [10:36:26<17:11:54, 3.84it/s] 36%|███▌ | 133481/371472 [10:36:27<19:26:26, 3.40it/s] 36%|███▌ | 133482/371472 [10:36:27<18:58:16, 3.48it/s] 36%|███▌ | 133483/371472 [10:36:27<18:02:57, 3.66it/s] 36%|███▌ | 133484/371472 [10:36:28<18:19:20, 3.61it/s] 36%|███▌ | 133485/371472 [10:36:28<18:49:40, 3.51it/s] 36%|███▌ | 133486/371472 [10:36:28<18:44:31, 3.53it/s] 36%|███▌ | 133487/371472 [10:36:28<19:20:30, 3.42it/s] 36%|███▌ | 133488/371472 [10:36:29<20:11:03, 3.28it/s] 36%|███▌ | 133489/371472 [10:36:29<19:55:46, 3.32it/s] 36%|███▌ | 133490/371472 [10:36:29<18:50:22, 3.51it/s] 36%|███▌ | 133491/371472 [10:36:30<19:35:57, 3.37it/s] 36%|███▌ | 133492/371472 [10:36:30<18:39:25, 3.54it/s] 36%|███▌ | 133493/371472 [10:36:30<17:51:39, 3.70it/s] 36%|███▌ | 133494/371472 [10:36:30<18:50:26, 3.51it/s] 36%|███▌ | 133495/371472 [10:36:31<18:08:08, 3.65it/s] 36%|███▌ | 133496/371472 [10:36:31<17:47:28, 3.72it/s] 36%|███▌ | 133497/371472 [10:36:31<20:42:34, 3.19it/s] 36%|███▌ | 133498/371472 [10:36:32<19:02:31, 3.47it/s] 36%|███▌ | 133499/371472 [10:36:32<18:47:26, 3.52it/s] 36%|███▌ | 133500/371472 [10:36:32<18:22:59, 3.60it/s] {'loss': 3.1098, 'learning_rate': 6.768676334331703e-07, 'epoch': 5.75} + 36%|███▌ | 133500/371472 [10:36:32<18:22:59, 3.60it/s] 36%|███▌ | 133501/371472 [10:36:32<18:38:06, 3.55it/s] 36%|███▌ | 133502/371472 [10:36:33<18:56:16, 3.49it/s] 36%|███▌ | 133503/371472 [10:36:33<18:24:30, 3.59it/s] 36%|███▌ | 133504/371472 [10:36:33<18:19:40, 3.61it/s] 36%|███▌ | 133505/371472 [10:36:34<18:00:03, 3.67it/s] 36%|███▌ | 133506/371472 [10:36:34<17:29:30, 3.78it/s] 36%|███▌ | 133507/371472 [10:36:34<18:28:53, 3.58it/s] 36%|███▌ | 133508/371472 [10:36:34<17:44:49, 3.72it/s] 36%|███▌ | 133509/371472 [10:36:35<17:12:39, 3.84it/s] 36%|███▌ | 133510/371472 [10:36:35<17:16:39, 3.83it/s] 36%|███▌ | 133511/371472 [10:36:35<16:56:11, 3.90it/s] 36%|███▌ | 133512/371472 [10:36:35<17:29:23, 3.78it/s] 36%|███▌ | 133513/371472 [10:36:36<17:06:53, 3.86it/s] 36%|███▌ | 133514/371472 [10:36:36<19:36:46, 3.37it/s] 36%|███▌ | 133515/371472 [10:36:36<19:18:41, 3.42it/s] 36%|███▌ | 133516/371472 [10:36:37<18:53:34, 3.50it/s] 36%|███▌ | 133517/371472 [10:36:37<18:21:40, 3.60it/s] 36%|███▌ | 133518/371472 [10:36:37<18:02:26, 3.66it/s] 36%|███▌ | 133519/371472 [10:36:37<17:35:34, 3.76it/s] 36%|███▌ | 133520/371472 [10:36:38<16:56:49, 3.90it/s] {'loss': 3.2555, 'learning_rate': 6.768191514576914e-07, 'epoch': 5.75} + 36%|███▌ | 133520/371472 [10:36:38<16:56:49, 3.90it/s] 36%|███▌ | 133521/371472 [10:36:38<16:46:54, 3.94it/s] 36%|███▌ | 133522/371472 [10:36:38<18:54:09, 3.50it/s] 36%|███▌ | 133523/371472 [10:36:39<19:14:37, 3.43it/s] 36%|███▌ | 133524/371472 [10:36:39<19:39:16, 3.36it/s] 36%|███▌ | 133525/371472 [10:36:39<18:44:22, 3.53it/s] 36%|███▌ | 133526/371472 [10:36:39<18:17:14, 3.61it/s] 36%|███▌ | 133527/371472 [10:36:40<17:41:03, 3.74it/s] 36%|███▌ | 133528/371472 [10:36:40<18:49:51, 3.51it/s] 36%|███▌ | 133529/371472 [10:36:40<18:10:36, 3.64it/s] 36%|███▌ | 133530/371472 [10:36:40<18:05:16, 3.65it/s] 36%|███▌ | 133531/371472 [10:36:41<18:40:27, 3.54it/s] 36%|███▌ | 133532/371472 [10:36:41<18:01:23, 3.67it/s] 36%|███▌ | 133533/371472 [10:36:41<18:50:00, 3.51it/s] 36%|███▌ | 133534/371472 [10:36:42<18:13:17, 3.63it/s] 36%|███▌ | 133535/371472 [10:36:42<18:56:16, 3.49it/s] 36%|███▌ | 133536/371472 [10:36:42<18:16:01, 3.62it/s] 36%|███▌ | 133537/371472 [10:36:42<17:38:41, 3.75it/s] 36%|███▌ | 133538/371472 [10:36:43<18:53:32, 3.50it/s] 36%|███▌ | 133539/371472 [10:36:43<18:19:01, 3.61it/s] 36%|███▌ | 133540/371472 [10:36:43<18:44:35, 3.53it/s] {'loss': 3.0656, 'learning_rate': 6.767706694822124e-07, 'epoch': 5.75} + 36%|███▌ | 133540/371472 [10:36:43<18:44:35, 3.53it/s] 36%|███▌ | 133541/371472 [10:36:44<18:24:13, 3.59it/s] 36%|███▌ | 133542/371472 [10:36:44<17:58:46, 3.68it/s] 36%|███▌ | 133543/371472 [10:36:44<18:07:07, 3.65it/s] 36%|███▌ | 133544/371472 [10:36:44<18:33:34, 3.56it/s] 36%|███▌ | 133545/371472 [10:36:45<18:03:48, 3.66it/s] 36%|███▌ | 133546/371472 [10:36:45<18:39:15, 3.54it/s] 36%|███▌ | 133547/371472 [10:36:45<19:02:43, 3.47it/s] 36%|███▌ | 133548/371472 [10:36:45<18:47:39, 3.52it/s] 36%|███▌ | 133549/371472 [10:36:46<18:50:19, 3.51it/s] 36%|███▌ | 133550/371472 [10:36:46<18:20:18, 3.60it/s] 36%|███▌ | 133551/371472 [10:36:46<17:49:06, 3.71it/s] 36%|███▌ | 133552/371472 [10:36:47<17:26:19, 3.79it/s] 36%|███▌ | 133553/371472 [10:36:47<18:09:36, 3.64it/s] 36%|███▌ | 133554/371472 [10:36:47<18:06:45, 3.65it/s] 36%|███▌ | 133555/371472 [10:36:47<18:10:29, 3.64it/s] 36%|███▌ | 133556/371472 [10:36:48<17:31:24, 3.77it/s] 36%|███▌ | 133557/371472 [10:36:48<18:04:05, 3.66it/s] 36%|███▌ | 133558/371472 [10:36:48<18:28:21, 3.58it/s] 36%|███▌ | 133559/371472 [10:36:48<17:44:57, 3.72it/s] 36%|███▌ | 133560/371472 [10:36:49<17:17:55, 3.82it/s] {'loss': 3.0742, 'learning_rate': 6.767221875067336e-07, 'epoch': 5.75} + 36%|███▌ | 133560/371472 [10:36:49<17:17:55, 3.82it/s] 36%|███▌ | 133561/371472 [10:36:49<17:05:03, 3.87it/s] 36%|███▌ | 133562/371472 [10:36:49<17:55:04, 3.69it/s] 36%|███▌ | 133563/371472 [10:36:50<18:16:51, 3.62it/s] 36%|███▌ | 133564/371472 [10:36:50<17:45:06, 3.72it/s] 36%|███▌ | 133565/371472 [10:36:50<17:32:47, 3.77it/s] 36%|███▌ | 133566/371472 [10:36:50<16:47:24, 3.94it/s] 36%|███▌ | 133567/371472 [10:36:51<17:07:35, 3.86it/s] 36%|███▌ | 133568/371472 [10:36:51<16:55:49, 3.90it/s] 36%|███▌ | 133569/371472 [10:36:51<17:10:52, 3.85it/s] 36%|███▌ | 133570/371472 [10:36:51<16:51:19, 3.92it/s] 36%|███▌ | 133571/371472 [10:36:52<17:35:30, 3.76it/s] 36%|███▌ | 133572/371472 [10:36:52<18:49:15, 3.51it/s] 36%|███▌ | 133573/371472 [10:36:52<19:28:13, 3.39it/s] 36%|███▌ | 133574/371472 [10:36:52<18:37:03, 3.55it/s] 36%|███▌ | 133575/371472 [10:36:53<20:33:53, 3.21it/s] 36%|███▌ | 133576/371472 [10:36:53<20:49:22, 3.17it/s] 36%|███▌ | 133577/371472 [10:36:53<19:20:33, 3.42it/s] 36%|███▌ | 133578/371472 [10:36:54<18:44:08, 3.53it/s] 36%|███▌ | 133579/371472 [10:36:54<18:06:20, 3.65it/s] 36%|███▌ | 133580/371472 [10:36:54<17:46:36, 3.72it/s] {'loss': 3.1825, 'learning_rate': 6.766737055312548e-07, 'epoch': 5.75} + 36%|███▌ | 133580/371472 [10:36:54<17:46:36, 3.72it/s] 36%|███▌ | 133581/371472 [10:36:55<18:21:21, 3.60it/s] 36%|███▌ | 133582/371472 [10:36:55<18:25:30, 3.59it/s] 36%|███▌ | 133583/371472 [10:36:55<18:32:52, 3.56it/s] 36%|███▌ | 133584/371472 [10:36:55<18:14:44, 3.62it/s] 36%|███▌ | 133585/371472 [10:36:56<17:50:05, 3.71it/s] 36%|███▌ | 133586/371472 [10:36:56<17:40:26, 3.74it/s] 36%|███▌ | 133587/371472 [10:36:56<19:17:23, 3.43it/s] 36%|███▌ | 133588/371472 [10:36:56<18:23:00, 3.59it/s] 36%|███▌ | 133589/371472 [10:36:57<17:46:24, 3.72it/s] 36%|███▌ | 133590/371472 [10:36:57<18:12:58, 3.63it/s] 36%|███▌ | 133591/371472 [10:36:57<19:06:30, 3.46it/s] 36%|███▌ | 133592/371472 [10:36:58<19:51:37, 3.33it/s] 36%|███▌ | 133593/371472 [10:36:58<19:47:10, 3.34it/s] 36%|███▌ | 133594/371472 [10:36:58<19:27:18, 3.40it/s] 36%|███▌ | 133595/371472 [10:36:58<18:46:28, 3.52it/s] 36%|███▌ | 133596/371472 [10:36:59<18:00:06, 3.67it/s] 36%|███▌ | 133597/371472 [10:36:59<18:20:49, 3.60it/s] 36%|███▌ | 133598/371472 [10:36:59<18:07:51, 3.64it/s] 36%|███▌ | 133599/371472 [10:37:00<18:46:04, 3.52it/s] 36%|███▌ | 133600/371472 [10:37:00<18:53:42, 3.50it/s] {'loss': 3.201, 'learning_rate': 6.766252235557757e-07, 'epoch': 5.75} + 36%|███▌ | 133600/371472 [10:37:00<18:53:42, 3.50it/s] 36%|███▌ | 133601/371472 [10:37:00<18:35:16, 3.55it/s] 36%|███▌ | 133602/371472 [10:37:00<17:56:15, 3.68it/s] 36%|███▌ | 133603/371472 [10:37:01<18:03:12, 3.66it/s] 36%|███▌ | 133604/371472 [10:37:01<17:50:15, 3.70it/s] 36%|███▌ | 133605/371472 [10:37:01<17:57:19, 3.68it/s] 36%|███▌ | 133606/371472 [10:37:01<18:10:42, 3.63it/s] 36%|███▌ | 133607/371472 [10:37:02<18:24:42, 3.59it/s] 36%|███▌ | 133608/371472 [10:37:02<18:14:52, 3.62it/s] 36%|███▌ | 133609/371472 [10:37:02<17:45:39, 3.72it/s] 36%|███▌ | 133610/371472 [10:37:03<17:55:19, 3.69it/s] 36%|███▌ | 133611/371472 [10:37:03<18:04:11, 3.66it/s] 36%|███▌ | 133612/371472 [10:37:03<18:00:12, 3.67it/s] 36%|███▌ | 133613/371472 [10:37:03<18:30:40, 3.57it/s] 36%|███▌ | 133614/371472 [10:37:04<17:48:13, 3.71it/s] 36%|███▌ | 133615/371472 [10:37:04<17:34:34, 3.76it/s] 36%|███▌ | 133616/371472 [10:37:04<17:45:18, 3.72it/s] 36%|███▌ | 133617/371472 [10:37:04<17:29:09, 3.78it/s] 36%|███▌ | 133618/371472 [10:37:05<18:45:20, 3.52it/s] 36%|███▌ | 133619/371472 [10:37:05<18:13:35, 3.62it/s] 36%|███▌ | 133620/371472 [10:37:05<19:01:44, 3.47it/s] {'loss': 3.3179, 'learning_rate': 6.765767415802968e-07, 'epoch': 5.76} + 36%|███▌ | 133620/371472 [10:37:05<19:01:44, 3.47it/s] 36%|███▌ | 133621/371472 [10:37:06<19:01:59, 3.47it/s] 36%|███▌ | 133622/371472 [10:37:06<18:30:09, 3.57it/s] 36%|███▌ | 133623/371472 [10:37:06<18:53:52, 3.50it/s] 36%|███▌ | 133624/371472 [10:37:06<18:17:20, 3.61it/s] 36%|███▌ | 133625/371472 [10:37:07<18:56:53, 3.49it/s] 36%|███▌ | 133626/371472 [10:37:07<19:28:24, 3.39it/s] 36%|███▌ | 133627/371472 [10:37:07<19:41:48, 3.35it/s] 36%|███▌ | 133628/371472 [10:37:08<20:34:32, 3.21it/s] 36%|███▌ | 133629/371472 [10:37:08<20:03:09, 3.29it/s] 36%|███▌ | 133630/371472 [10:37:08<19:49:24, 3.33it/s] 36%|███▌ | 133631/371472 [10:37:09<18:46:19, 3.52it/s] 36%|███▌ | 133632/371472 [10:37:09<19:16:12, 3.43it/s] 36%|███▌ | 133633/371472 [10:37:09<18:40:25, 3.54it/s] 36%|███▌ | 133634/371472 [10:37:09<17:59:01, 3.67it/s] 36%|███▌ | 133635/371472 [10:37:10<18:52:11, 3.50it/s] 36%|███▌ | 133636/371472 [10:37:10<18:20:39, 3.60it/s] 36%|███▌ | 133637/371472 [10:37:10<17:40:09, 3.74it/s] 36%|███▌ | 133638/371472 [10:37:10<17:46:00, 3.72it/s] 36%|███▌ | 133639/371472 [10:37:11<17:14:34, 3.83it/s] 36%|███▌ | 133640/371472 [10:37:11<16:59:44, 3.89it/s] {'loss': 3.1431, 'learning_rate': 6.76528259604818e-07, 'epoch': 5.76} + 36%|███▌ | 133640/371472 [10:37:11<16:59:44, 3.89it/s] 36%|███▌ | 133641/371472 [10:37:11<18:10:46, 3.63it/s] 36%|███▌ | 133642/371472 [10:37:12<17:47:49, 3.71it/s] 36%|███▌ | 133643/371472 [10:37:12<19:13:09, 3.44it/s] 36%|███▌ | 133644/371472 [10:37:12<18:36:47, 3.55it/s] 36%|███▌ | 133645/371472 [10:37:12<18:41:13, 3.54it/s] 36%|███▌ | 133646/371472 [10:37:13<19:06:12, 3.46it/s] 36%|███▌ | 133647/371472 [10:37:13<18:45:14, 3.52it/s] 36%|███▌ | 133648/371472 [10:37:13<19:21:28, 3.41it/s] 36%|███▌ | 133649/371472 [10:37:14<18:21:12, 3.60it/s] 36%|███▌ | 133650/371472 [10:37:14<18:06:04, 3.65it/s] 36%|███▌ | 133651/371472 [10:37:14<17:37:48, 3.75it/s] 36%|███▌ | 133652/371472 [10:37:14<17:22:21, 3.80it/s] 36%|███▌ | 133653/371472 [10:37:15<18:46:18, 3.52it/s] 36%|███▌ | 133654/371472 [10:37:15<18:46:02, 3.52it/s] 36%|███▌ | 133655/371472 [10:37:15<20:58:30, 3.15it/s] 36%|███▌ | 133656/371472 [10:37:16<19:32:54, 3.38it/s] 36%|███▌ | 133657/371472 [10:37:16<18:28:41, 3.58it/s] 36%|███▌ | 133658/371472 [10:37:16<17:54:34, 3.69it/s] 36%|███▌ | 133659/371472 [10:37:16<18:41:13, 3.54it/s] 36%|███▌ | 133660/371472 [10:37:17<18:12:20, 3.63it/s] {'loss': 3.28, 'learning_rate': 6.764797776293391e-07, 'epoch': 5.76} + 36%|███▌ | 133660/371472 [10:37:17<18:12:20, 3.63it/s] 36%|███▌ | 133661/371472 [10:37:17<17:23:42, 3.80it/s] 36%|███▌ | 133662/371472 [10:37:17<17:33:07, 3.76it/s] 36%|███▌ | 133663/371472 [10:37:17<17:21:53, 3.80it/s] 36%|███▌ | 133664/371472 [10:37:18<16:56:19, 3.90it/s] 36%|███▌ | 133665/371472 [10:37:18<18:09:38, 3.64it/s] 36%|███▌ | 133666/371472 [10:37:18<17:44:43, 3.72it/s] 36%|███▌ | 133667/371472 [10:37:18<17:35:08, 3.76it/s] 36%|███▌ | 133668/371472 [10:37:19<17:32:51, 3.76it/s] 36%|███▌ | 133669/371472 [10:37:19<17:29:48, 3.78it/s] 36%|███▌ | 133670/371472 [10:37:19<18:48:10, 3.51it/s] 36%|███▌ | 133671/371472 [10:37:20<18:54:03, 3.49it/s] 36%|███▌ | 133672/371472 [10:37:20<19:14:44, 3.43it/s] 36%|███▌ | 133673/371472 [10:37:20<18:44:44, 3.52it/s] 36%|███▌ | 133674/371472 [10:37:20<18:20:11, 3.60it/s] 36%|███▌ | 133675/371472 [10:37:21<18:17:54, 3.61it/s] 36%|███▌ | 133676/371472 [10:37:21<19:32:15, 3.38it/s] 36%|███▌ | 133677/371472 [10:37:21<19:09:24, 3.45it/s] 36%|███▌ | 133678/371472 [10:37:22<19:36:03, 3.37it/s] 36%|███▌ | 133679/371472 [10:37:22<18:53:00, 3.50it/s] 36%|███▌ | 133680/371472 [10:37:22<18:15:07, 3.62it/s] {'loss': 3.0673, 'learning_rate': 6.764312956538602e-07, 'epoch': 5.76} + 36%|███▌ | 133680/371472 [10:37:22<18:15:07, 3.62it/s] 36%|███▌ | 133681/371472 [10:37:22<17:51:10, 3.70it/s] 36%|███▌ | 133682/371472 [10:37:23<17:23:35, 3.80it/s] 36%|███▌ | 133683/371472 [10:37:23<17:16:53, 3.82it/s] 36%|███▌ | 133684/371472 [10:37:23<17:51:38, 3.70it/s] 36%|███▌ | 133685/371472 [10:37:24<18:34:07, 3.56it/s] 36%|███▌ | 133686/371472 [10:37:24<20:38:33, 3.20it/s] 36%|███▌ | 133687/371472 [10:37:24<20:00:48, 3.30it/s] 36%|███▌ | 133688/371472 [10:37:24<18:47:36, 3.51it/s] 36%|███▌ | 133689/371472 [10:37:25<19:18:51, 3.42it/s] 36%|███▌ | 133690/371472 [10:37:25<19:07:45, 3.45it/s] 36%|███▌ | 133691/371472 [10:37:25<17:56:32, 3.68it/s] 36%|███▌ | 133692/371472 [10:37:26<18:58:48, 3.48it/s] 36%|███▌ | 133693/371472 [10:37:26<18:51:51, 3.50it/s] 36%|███▌ | 133694/371472 [10:37:26<18:12:26, 3.63it/s] 36%|███▌ | 133695/371472 [10:37:26<18:39:32, 3.54it/s] 36%|███▌ | 133696/371472 [10:37:27<18:55:16, 3.49it/s] 36%|███▌ | 133697/371472 [10:37:27<19:11:06, 3.44it/s] 36%|███▌ | 133698/371472 [10:37:27<19:13:59, 3.43it/s] 36%|███▌ | 133699/371472 [10:37:28<20:24:52, 3.24it/s] 36%|███▌ | 133700/371472 [10:37:28<22:48:09, 2.90it/s] {'loss': 2.9866, 'learning_rate': 6.763828136783813e-07, 'epoch': 5.76} + 36%|███▌ | 133700/371472 [10:37:28<22:48:09, 2.90it/s] 36%|███▌ | 133701/371472 [10:37:28<22:10:57, 2.98it/s] 36%|███▌ | 133702/371472 [10:37:29<21:00:46, 3.14it/s] 36%|███▌ | 133703/371472 [10:37:29<19:46:50, 3.34it/s] 36%|███▌ | 133704/371472 [10:37:29<18:44:30, 3.52it/s] 36%|███▌ | 133705/371472 [10:37:29<18:45:10, 3.52it/s] 36%|███▌ | 133706/371472 [10:37:30<18:49:15, 3.51it/s] 36%|███▌ | 133707/371472 [10:37:30<18:58:47, 3.48it/s] 36%|███▌ | 133708/371472 [10:37:30<18:26:33, 3.58it/s] 36%|███▌ | 133709/371472 [10:37:31<18:52:01, 3.50it/s] 36%|███▌ | 133710/371472 [10:37:31<18:58:31, 3.48it/s] 36%|███▌ | 133711/371472 [10:37:31<18:51:42, 3.50it/s] 36%|███▌ | 133712/371472 [10:37:31<18:22:17, 3.59it/s] 36%|███▌ | 133713/371472 [10:37:32<20:10:04, 3.27it/s] 36%|███▌ | 133714/371472 [10:37:32<18:59:50, 3.48it/s] 36%|███▌ | 133715/371472 [10:37:32<18:30:39, 3.57it/s] 36%|███▌ | 133716/371472 [10:37:33<17:59:28, 3.67it/s] 36%|███▌ | 133717/371472 [10:37:33<17:43:20, 3.73it/s] 36%|███▌ | 133718/371472 [10:37:33<17:27:06, 3.78it/s] 36%|███▌ | 133719/371472 [10:37:33<18:58:30, 3.48it/s] 36%|███▌ | 133720/371472 [10:37:34<18:31:11, 3.57it/s] {'loss': 3.2106, 'learning_rate': 6.763343317029025e-07, 'epoch': 5.76} + 36%|███▌ | 133720/371472 [10:37:34<18:31:11, 3.57it/s] 36%|███▌ | 133721/371472 [10:37:34<18:04:43, 3.65it/s] 36%|███▌ | 133722/371472 [10:37:34<17:40:08, 3.74it/s] 36%|███▌ | 133723/371472 [10:37:34<17:31:10, 3.77it/s] 36%|███▌ | 133724/371472 [10:37:35<18:01:43, 3.66it/s] 36%|███▌ | 133725/371472 [10:37:35<17:40:46, 3.74it/s] 36%|███▌ | 133726/371472 [10:37:35<17:39:28, 3.74it/s] 36%|███▌ | 133727/371472 [10:37:36<17:27:56, 3.78it/s] 36%|███▌ | 133728/371472 [10:37:36<17:36:10, 3.75it/s] 36%|███▌ | 133729/371472 [10:37:36<18:38:05, 3.54it/s] 36%|███▌ | 133730/371472 [10:37:36<18:29:15, 3.57it/s] 36%|███▌ | 133731/371472 [10:37:37<17:57:44, 3.68it/s] 36%|███▌ | 133732/371472 [10:37:37<18:17:37, 3.61it/s] 36%|███▌ | 133733/371472 [10:37:37<17:36:08, 3.75it/s] 36%|███▌ | 133734/371472 [10:37:37<17:59:12, 3.67it/s] 36%|███▌ | 133735/371472 [10:37:38<18:42:38, 3.53it/s] 36%|███▌ | 133736/371472 [10:37:38<19:28:05, 3.39it/s] 36%|███▌ | 133737/371472 [10:37:38<19:22:40, 3.41it/s] 36%|███▌ | 133738/371472 [10:37:39<20:05:07, 3.29it/s] 36%|███▌ | 133739/371472 [10:37:39<19:24:46, 3.40it/s] 36%|███▌ | 133740/371472 [10:37:39<19:30:47, 3.38it/s] {'loss': 3.2006, 'learning_rate': 6.762858497274235e-07, 'epoch': 5.76} + 36%|███▌ | 133740/371472 [10:37:39<19:30:47, 3.38it/s] 36%|███▌ | 133741/371472 [10:37:40<20:04:06, 3.29it/s] 36%|███▌ | 133742/371472 [10:37:40<19:10:52, 3.44it/s] 36%|███▌ | 133743/371472 [10:37:40<20:49:03, 3.17it/s] 36%|███▌ | 133744/371472 [10:37:41<21:05:15, 3.13it/s] 36%|███▌ | 133745/371472 [10:37:41<19:35:25, 3.37it/s] 36%|███▌ | 133746/371472 [10:37:41<18:55:06, 3.49it/s] 36%|███▌ | 133747/371472 [10:37:41<18:05:47, 3.65it/s] 36%|███▌ | 133748/371472 [10:37:42<18:13:05, 3.62it/s] 36%|███▌ | 133749/371472 [10:37:42<18:15:17, 3.62it/s] 36%|███▌ | 133750/371472 [10:37:42<18:19:19, 3.60it/s] 36%|███▌ | 133751/371472 [10:37:42<19:00:23, 3.47it/s] 36%|███▌ | 133752/371472 [10:37:43<18:12:33, 3.63it/s] 36%|███▌ | 133753/371472 [10:37:43<17:34:44, 3.76it/s] 36%|███▌ | 133754/371472 [10:37:43<17:32:32, 3.76it/s] 36%|███▌ | 133755/371472 [10:37:44<17:41:15, 3.73it/s] 36%|███▌ | 133756/371472 [10:37:44<17:22:40, 3.80it/s] 36%|███▌ | 133757/371472 [10:37:44<17:46:48, 3.71it/s] 36%|███▌ | 133758/371472 [10:37:44<18:19:13, 3.60it/s] 36%|███▌ | 133759/371472 [10:37:45<17:33:49, 3.76it/s] 36%|███▌ | 133760/371472 [10:37:45<17:39:20, 3.74it/s] {'loss': 3.3177, 'learning_rate': 6.762373677519446e-07, 'epoch': 5.76} + 36%|███▌ | 133760/371472 [10:37:45<17:39:20, 3.74it/s] 36%|███▌ | 133761/371472 [10:37:45<17:55:11, 3.68it/s] 36%|███▌ | 133762/371472 [10:37:45<19:01:48, 3.47it/s] 36%|███▌ | 133763/371472 [10:37:46<21:16:27, 3.10it/s] 36%|███▌ | 133764/371472 [10:37:46<19:59:53, 3.30it/s] 36%|███▌ | 133765/371472 [10:37:46<18:55:52, 3.49it/s] 36%|███▌ | 133766/371472 [10:37:47<19:29:07, 3.39it/s] 36%|███▌ | 133767/371472 [10:37:47<18:19:34, 3.60it/s] 36%|███▌ | 133768/371472 [10:37:47<19:15:44, 3.43it/s] 36%|███▌ | 133769/371472 [10:37:48<19:49:03, 3.33it/s] 36%|███▌ | 133770/371472 [10:37:48<19:19:19, 3.42it/s] 36%|███▌ | 133771/371472 [10:37:48<19:51:54, 3.32it/s] 36%|███▌ | 133772/371472 [10:37:48<19:33:56, 3.37it/s] 36%|███▌ | 133773/371472 [10:37:49<21:38:19, 3.05it/s] 36%|███▌ | 133774/371472 [10:37:49<20:33:47, 3.21it/s] 36%|███▌ | 133775/371472 [10:37:49<21:16:25, 3.10it/s] 36%|███▌ | 133776/371472 [10:37:50<21:40:18, 3.05it/s] 36%|███▌ | 133777/371472 [10:37:50<20:46:39, 3.18it/s] 36%|███▌ | 133778/371472 [10:37:50<20:08:19, 3.28it/s] 36%|███▌ | 133779/371472 [10:37:51<19:07:38, 3.45it/s] 36%|███▌ | 133780/371472 [10:37:51<18:11:30, 3.63it/s] {'loss': 3.1612, 'learning_rate': 6.761888857764657e-07, 'epoch': 5.76} + 36%|███▌ | 133780/371472 [10:37:51<18:11:30, 3.63it/s] 36%|███▌ | 133781/371472 [10:37:51<19:08:26, 3.45it/s] 36%|███▌ | 133782/371472 [10:37:51<18:23:40, 3.59it/s] 36%|███▌ | 133783/371472 [10:37:52<19:37:20, 3.36it/s] 36%|███▌ | 133784/371472 [10:37:52<18:42:00, 3.53it/s] 36%|███▌ | 133785/371472 [10:37:52<18:17:14, 3.61it/s] 36%|███▌ | 133786/371472 [10:37:53<17:54:30, 3.69it/s] 36%|███▌ | 133787/371472 [10:37:53<17:49:25, 3.70it/s] 36%|███▌ | 133788/371472 [10:37:53<17:31:47, 3.77it/s] 36%|███▌ | 133789/371472 [10:37:53<17:57:42, 3.68it/s] 36%|███▌ | 133790/371472 [10:37:54<19:19:03, 3.42it/s] 36%|███▌ | 133791/371472 [10:37:54<18:09:30, 3.64it/s] 36%|███▌ | 133792/371472 [10:37:54<17:44:26, 3.72it/s] 36%|███▌ | 133793/371472 [10:37:55<20:20:12, 3.25it/s] 36%|███▌ | 133794/371472 [10:37:55<18:58:04, 3.48it/s] 36%|███▌ | 133795/371472 [10:37:55<18:28:36, 3.57it/s] 36%|███▌ | 133796/371472 [10:37:55<17:44:22, 3.72it/s] 36%|███▌ | 133797/371472 [10:37:56<17:46:32, 3.71it/s] 36%|███▌ | 133798/371472 [10:37:56<17:18:24, 3.81it/s] 36%|███▌ | 133799/371472 [10:37:56<17:37:15, 3.75it/s] 36%|███▌ | 133800/371472 [10:37:56<19:11:52, 3.44it/s] {'loss': 3.2061, 'learning_rate': 6.761404038009868e-07, 'epoch': 5.76} + 36%|███▌ | 133800/371472 [10:37:56<19:11:52, 3.44it/s] 36%|███▌ | 133801/371472 [10:37:57<19:06:26, 3.46it/s] 36%|███▌ | 133802/371472 [10:37:57<18:47:24, 3.51it/s] 36%|███▌ | 133803/371472 [10:37:57<18:08:22, 3.64it/s] 36%|███▌ | 133804/371472 [10:37:58<17:53:13, 3.69it/s] 36%|███▌ | 133805/371472 [10:37:58<17:17:32, 3.82it/s] 36%|███▌ | 133806/371472 [10:37:58<17:33:09, 3.76it/s] 36%|███▌ | 133807/371472 [10:37:58<17:25:34, 3.79it/s] 36%|███▌ | 133808/371472 [10:37:59<16:56:44, 3.90it/s] 36%|███▌ | 133809/371472 [10:37:59<17:33:56, 3.76it/s] 36%|███▌ | 133810/371472 [10:37:59<17:36:07, 3.75it/s] 36%|███▌ | 133811/371472 [10:37:59<18:37:11, 3.55it/s] 36%|███▌ | 133812/371472 [10:38:00<19:00:06, 3.47it/s] 36%|███▌ | 133813/371472 [10:38:00<18:33:00, 3.56it/s] 36%|███▌ | 133814/371472 [10:38:00<19:37:16, 3.36it/s] 36%|███▌ | 133815/371472 [10:38:01<19:28:16, 3.39it/s] 36%|███▌ | 133816/371472 [10:38:01<18:59:05, 3.48it/s] 36%|███▌ | 133817/371472 [10:38:01<18:23:11, 3.59it/s] 36%|███▌ | 133818/371472 [10:38:01<18:04:34, 3.65it/s] 36%|███▌ | 133819/371472 [10:38:02<17:32:28, 3.76it/s] 36%|███▌ | 133820/371472 [10:38:02<17:13:48, 3.83it/s] {'loss': 3.2408, 'learning_rate': 6.76091921825508e-07, 'epoch': 5.76} + 36%|███▌ | 133820/371472 [10:38:02<17:13:48, 3.83it/s] 36%|███▌ | 133821/371472 [10:38:02<17:19:37, 3.81it/s] 36%|███▌ | 133822/371472 [10:38:02<17:48:54, 3.71it/s] 36%|███▌ | 133823/371472 [10:38:03<17:33:31, 3.76it/s] 36%|███▌ | 133824/371472 [10:38:03<17:18:15, 3.81it/s] 36%|███▌ | 133825/371472 [10:38:03<17:10:38, 3.84it/s] 36%|███▌ | 133826/371472 [10:38:04<18:35:05, 3.55it/s] 36%|███▌ | 133827/371472 [10:38:04<18:18:23, 3.61it/s] 36%|███▌ | 133828/371472 [10:38:04<18:05:10, 3.65it/s] 36%|███▌ | 133829/371472 [10:38:04<18:02:51, 3.66it/s] 36%|███▌ | 133830/371472 [10:38:05<17:25:31, 3.79it/s] 36%|███▌ | 133831/371472 [10:38:05<17:22:06, 3.80it/s] 36%|███▌ | 133832/371472 [10:38:05<17:37:50, 3.74it/s] 36%|███▌ | 133833/371472 [10:38:05<17:12:01, 3.84it/s] 36%|███▌ | 133834/371472 [10:38:06<17:04:49, 3.86it/s] 36%|███▌ | 133835/371472 [10:38:06<17:07:50, 3.85it/s] 36%|███▌ | 133836/371472 [10:38:06<17:14:02, 3.83it/s] 36%|███▌ | 133837/371472 [10:38:07<18:29:23, 3.57it/s] 36%|███▌ | 133838/371472 [10:38:07<18:29:07, 3.57it/s] 36%|███▌ | 133839/371472 [10:38:07<18:10:14, 3.63it/s] 36%|███▌ | 133840/371472 [10:38:07<18:40:43, 3.53it/s] {'loss': 3.0431, 'learning_rate': 6.760434398500291e-07, 'epoch': 5.76} + 36%|███▌ | 133840/371472 [10:38:07<18:40:43, 3.53it/s] 36%|███▌ | 133841/371472 [10:38:08<18:57:43, 3.48it/s] 36%|███▌ | 133842/371472 [10:38:08<18:41:51, 3.53it/s] 36%|███▌ | 133843/371472 [10:38:08<19:15:20, 3.43it/s] 36%|███▌ | 133844/371472 [10:38:08<18:08:40, 3.64it/s] 36%|███▌ | 133845/371472 [10:38:09<18:13:48, 3.62it/s] 36%|███▌ | 133846/371472 [10:38:09<19:01:45, 3.47it/s] 36%|███▌ | 133847/371472 [10:38:09<19:51:25, 3.32it/s] 36%|███▌ | 133848/371472 [10:38:10<19:08:59, 3.45it/s] 36%|███▌ | 133849/371472 [10:38:10<18:56:10, 3.49it/s] 36%|███▌ | 133850/371472 [10:38:10<18:17:47, 3.61it/s] 36%|███▌ | 133851/371472 [10:38:10<18:16:26, 3.61it/s] 36%|███▌ | 133852/371472 [10:38:11<17:36:11, 3.75it/s] 36%|███▌ | 133853/371472 [10:38:11<17:13:28, 3.83it/s] 36%|███▌ | 133854/371472 [10:38:11<16:59:36, 3.88it/s] 36%|███▌ | 133855/371472 [10:38:11<16:58:58, 3.89it/s] 36%|███▌ | 133856/371472 [10:38:12<16:55:12, 3.90it/s] 36%|███▌ | 133857/371472 [10:38:12<17:29:22, 3.77it/s] 36%|███▌ | 133858/371472 [10:38:12<18:20:42, 3.60it/s] 36%|███▌ | 133859/371472 [10:38:13<19:37:15, 3.36it/s] 36%|███▌ | 133860/371472 [10:38:13<21:00:22, 3.14it/s] {'loss': 3.1857, 'learning_rate': 6.759949578745501e-07, 'epoch': 5.77} + 36%|███▌ | 133860/371472 [10:38:13<21:00:22, 3.14it/s] 36%|███▌ | 133861/371472 [10:38:13<19:55:07, 3.31it/s] 36%|███▌ | 133862/371472 [10:38:14<18:41:33, 3.53it/s] 36%|███▌ | 133863/371472 [10:38:14<18:03:05, 3.66it/s] 36%|███▌ | 133864/371472 [10:38:14<18:24:43, 3.58it/s] 36%|███▌ | 133865/371472 [10:38:14<18:23:16, 3.59it/s] 36%|███▌ | 133866/371472 [10:38:15<17:52:58, 3.69it/s] 36%|███▌ | 133867/371472 [10:38:15<17:09:19, 3.85it/s] 36%|███▌ | 133868/371472 [10:38:15<17:05:12, 3.86it/s] 36%|███▌ | 133869/371472 [10:38:15<17:39:20, 3.74it/s] 36%|███▌ | 133870/371472 [10:38:16<17:13:59, 3.83it/s] 36%|███▌ | 133871/371472 [10:38:16<17:54:31, 3.69it/s] 36%|███▌ | 133872/371472 [10:38:16<17:36:33, 3.75it/s] 36%|███▌ | 133873/371472 [10:38:17<18:50:40, 3.50it/s] 36%|███▌ | 133874/371472 [10:38:17<18:45:57, 3.52it/s] 36%|███▌ | 133875/371472 [10:38:17<18:12:12, 3.63it/s] 36%|███▌ | 133876/371472 [10:38:17<18:08:32, 3.64it/s] 36%|███▌ | 133877/371472 [10:38:18<17:44:21, 3.72it/s] 36%|███▌ | 133878/371472 [10:38:18<17:17:15, 3.82it/s] 36%|███▌ | 133879/371472 [10:38:18<17:59:23, 3.67it/s] 36%|███▌ | 133880/371472 [10:38:18<18:18:18, 3.61it/s] {'loss': 3.0534, 'learning_rate': 6.759464758990712e-07, 'epoch': 5.77} + 36%|███▌ | 133880/371472 [10:38:18<18:18:18, 3.61it/s] 36%|███▌ | 133881/371472 [10:38:19<18:59:41, 3.47it/s] 36%|███▌ | 133882/371472 [10:38:19<19:22:12, 3.41it/s] 36%|███▌ | 133883/371472 [10:38:19<19:20:27, 3.41it/s] 36%|███▌ | 133884/371472 [10:38:20<20:28:44, 3.22it/s] 36%|███▌ | 133885/371472 [10:38:20<18:57:35, 3.48it/s] 36%|███▌ | 133886/371472 [10:38:20<19:36:25, 3.37it/s] 36%|███▌ | 133887/371472 [10:38:21<19:23:11, 3.40it/s] 36%|███▌ | 133888/371472 [10:38:21<18:53:11, 3.49it/s] 36%|███▌ | 133889/371472 [10:38:21<18:43:45, 3.52it/s] 36%|███▌ | 133890/371472 [10:38:21<19:25:51, 3.40it/s] 36%|███▌ | 133891/371472 [10:38:22<20:21:57, 3.24it/s] 36%|███▌ | 133892/371472 [10:38:22<20:23:23, 3.24it/s] 36%|███▌ | 133893/371472 [10:38:22<18:59:22, 3.48it/s] 36%|███▌ | 133894/371472 [10:38:23<18:19:14, 3.60it/s] 36%|███▌ | 133895/371472 [10:38:23<19:19:58, 3.41it/s] 36%|███▌ | 133896/371472 [10:38:23<19:03:09, 3.46it/s] 36%|███▌ | 133897/371472 [10:38:23<18:11:58, 3.63it/s] 36%|███▌ | 133898/371472 [10:38:24<20:11:15, 3.27it/s] 36%|███▌ | 133899/371472 [10:38:24<19:31:04, 3.38it/s] 36%|███▌ | 133900/371472 [10:38:24<18:41:10, 3.53it/s] {'loss': 3.2721, 'learning_rate': 6.758979939235925e-07, 'epoch': 5.77} + 36%|███▌ | 133900/371472 [10:38:24<18:41:10, 3.53it/s] 36%|███▌ | 133901/371472 [10:38:25<18:24:22, 3.59it/s] 36%|███▌ | 133902/371472 [10:38:25<17:40:03, 3.74it/s] 36%|███▌ | 133903/371472 [10:38:25<17:36:08, 3.75it/s] 36%|███▌ | 133904/371472 [10:38:25<17:59:29, 3.67it/s] 36%|███▌ | 133905/371472 [10:38:26<18:59:58, 3.47it/s] 36%|███▌ | 133906/371472 [10:38:26<18:17:35, 3.61it/s] 36%|███▌ | 133907/371472 [10:38:26<17:50:13, 3.70it/s] 36%|███▌ | 133908/371472 [10:38:26<18:45:19, 3.52it/s] 36%|███▌ | 133909/371472 [10:38:27<18:11:32, 3.63it/s] 36%|███▌ | 133910/371472 [10:38:27<17:34:47, 3.75it/s] 36%|███▌ | 133911/371472 [10:38:27<16:52:52, 3.91it/s] 36%|███▌ | 133912/371472 [10:38:28<18:42:00, 3.53it/s] 36%|███▌ | 133913/371472 [10:38:28<18:23:49, 3.59it/s] 36%|███▌ | 133914/371472 [10:38:28<17:55:27, 3.68it/s] 36%|███▌ | 133915/371472 [10:38:28<17:38:50, 3.74it/s] 36%|███▌ | 133916/371472 [10:38:29<18:02:42, 3.66it/s] 36%|███▌ | 133917/371472 [10:38:29<18:14:04, 3.62it/s] 36%|███▌ | 133918/371472 [10:38:29<19:18:08, 3.42it/s] 36%|███▌ | 133919/371472 [10:38:30<18:53:32, 3.49it/s] 36%|███▌ | 133920/371472 [10:38:30<17:59:42, 3.67it/s] {'loss': 3.2464, 'learning_rate': 6.758495119481135e-07, 'epoch': 5.77} + 36%|███▌ | 133920/371472 [10:38:30<17:59:42, 3.67it/s] 36%|███▌ | 133921/371472 [10:38:30<17:44:15, 3.72it/s] 36%|███▌ | 133922/371472 [10:38:30<17:40:57, 3.73it/s] 36%|███▌ | 133923/371472 [10:38:31<18:17:20, 3.61it/s] 36%|███▌ | 133924/371472 [10:38:31<18:18:41, 3.60it/s] 36%|███▌ | 133925/371472 [10:38:31<17:55:21, 3.68it/s] 36%|███▌ | 133926/371472 [10:38:31<17:33:37, 3.76it/s] 36%|███▌ | 133927/371472 [10:38:32<17:19:59, 3.81it/s] 36%|███▌ | 133928/371472 [10:38:32<17:48:06, 3.71it/s] 36%|███▌ | 133929/371472 [10:38:32<17:58:28, 3.67it/s] 36%|███▌ | 133930/371472 [10:38:33<18:53:38, 3.49it/s] 36%|███▌ | 133931/371472 [10:38:33<18:39:12, 3.54it/s] 36%|███▌ | 133932/371472 [10:38:33<18:36:31, 3.55it/s] 36%|███▌ | 133933/371472 [10:38:33<17:58:29, 3.67it/s] 36%|███▌ | 133934/371472 [10:38:34<19:16:39, 3.42it/s] 36%|███▌ | 133935/371472 [10:38:34<19:13:55, 3.43it/s] 36%|███▌ | 133936/371472 [10:38:34<19:19:24, 3.41it/s] 36%|███▌ | 133937/371472 [10:38:35<20:18:57, 3.25it/s] 36%|███▌ | 133938/371472 [10:38:35<20:18:01, 3.25it/s] 36%|███▌ | 133939/371472 [10:38:35<19:34:43, 3.37it/s] 36%|███▌ | 133940/371472 [10:38:35<18:47:11, 3.51it/s] {'loss': 3.0732, 'learning_rate': 6.758010299726346e-07, 'epoch': 5.77} + 36%|███▌ | 133940/371472 [10:38:35<18:47:11, 3.51it/s] 36%|███▌ | 133941/371472 [10:38:36<19:42:05, 3.35it/s] 36%|███▌ | 133942/371472 [10:38:36<19:06:34, 3.45it/s] 36%|███▌ | 133943/371472 [10:38:36<20:06:02, 3.28it/s] 36%|███▌ | 133944/371472 [10:38:37<19:41:07, 3.35it/s] 36%|███▌ | 133945/371472 [10:38:37<19:08:52, 3.45it/s] 36%|███▌ | 133946/371472 [10:38:37<19:10:51, 3.44it/s] 36%|███▌ | 133947/371472 [10:38:37<19:26:26, 3.39it/s] 36%|███▌ | 133948/371472 [10:38:38<19:01:47, 3.47it/s] 36%|███▌ | 133949/371472 [10:38:38<19:06:40, 3.45it/s] 36%|███▌ | 133950/371472 [10:38:38<19:08:50, 3.45it/s] 36%|███▌ | 133951/371472 [10:38:39<18:27:16, 3.58it/s] 36%|███▌ | 133952/371472 [10:38:39<18:26:58, 3.58it/s] 36%|███▌ | 133953/371472 [10:38:39<18:21:18, 3.59it/s] 36%|███▌ | 133954/371472 [10:38:39<18:40:26, 3.53it/s] 36%|███▌ | 133955/371472 [10:38:40<18:15:17, 3.61it/s] 36%|███▌ | 133956/371472 [10:38:40<18:43:34, 3.52it/s] 36%|███▌ | 133957/371472 [10:38:40<18:13:31, 3.62it/s] 36%|███▌ | 133958/371472 [10:38:41<18:15:53, 3.61it/s] 36%|███▌ | 133959/371472 [10:38:41<19:19:31, 3.41it/s] 36%|███▌ | 133960/371472 [10:38:41<19:43:16, 3.35it/s] {'loss': 3.1995, 'learning_rate': 6.757525479971557e-07, 'epoch': 5.77} + 36%|███▌ | 133960/371472 [10:38:41<19:43:16, 3.35it/s] 36%|███▌ | 133961/371472 [10:38:41<19:11:52, 3.44it/s] 36%|███▌ | 133962/371472 [10:38:42<18:40:08, 3.53it/s] 36%|███▌ | 133963/371472 [10:38:42<19:01:07, 3.47it/s] 36%|███▌ | 133964/371472 [10:38:42<18:50:01, 3.50it/s] 36%|███▌ | 133965/371472 [10:38:43<18:17:40, 3.61it/s] 36%|███▌ | 133966/371472 [10:38:43<18:09:58, 3.63it/s] 36%|███▌ | 133967/371472 [10:38:43<17:40:28, 3.73it/s] 36%|███▌ | 133968/371472 [10:38:43<17:25:33, 3.79it/s] 36%|███▌ | 133969/371472 [10:38:44<17:16:02, 3.82it/s] 36%|███▌ | 133970/371472 [10:38:44<20:16:15, 3.25it/s] 36%|███▌ | 133971/371472 [10:38:44<19:25:01, 3.40it/s] 36%|███▌ | 133972/371472 [10:38:45<19:09:46, 3.44it/s] 36%|███▌ | 133973/371472 [10:38:45<18:20:23, 3.60it/s] 36%|███▌ | 133974/371472 [10:38:45<19:41:03, 3.35it/s] 36%|███▌ | 133975/371472 [10:38:45<20:00:32, 3.30it/s] 36%|███▌ | 133976/371472 [10:38:46<19:16:01, 3.42it/s] 36%|███▌ | 133977/371472 [10:38:46<19:19:57, 3.41it/s] 36%|███▌ | 133978/371472 [10:38:46<18:57:20, 3.48it/s] 36%|███▌ | 133979/371472 [10:38:47<18:13:23, 3.62it/s] 36%|███▌ | 133980/371472 [10:38:47<17:53:08, 3.69it/s] {'loss': 3.104, 'learning_rate': 6.757040660216768e-07, 'epoch': 5.77} + 36%|███▌ | 133980/371472 [10:38:47<17:53:08, 3.69it/s] 36%|███▌ | 133981/371472 [10:38:47<17:53:54, 3.69it/s] 36%|███▌ | 133982/371472 [10:38:47<17:18:31, 3.81it/s] 36%|███▌ | 133983/371472 [10:38:48<17:00:18, 3.88it/s] 36%|███▌ | 133984/371472 [10:38:48<16:53:48, 3.90it/s] 36%|███▌ | 133985/371472 [10:38:48<18:35:25, 3.55it/s] 36%|███▌ | 133986/371472 [10:38:48<18:09:59, 3.63it/s] 36%|███▌ | 133987/371472 [10:38:49<18:40:44, 3.53it/s] 36%|███▌ | 133988/371472 [10:38:49<18:52:28, 3.50it/s] 36%|███▌ | 133989/371472 [10:38:49<17:48:37, 3.70it/s] 36%|███▌ | 133990/371472 [10:38:50<18:26:26, 3.58it/s] 36%|███▌ | 133991/371472 [10:38:50<18:03:16, 3.65it/s] 36%|███▌ | 133992/371472 [10:38:50<18:39:39, 3.54it/s] 36%|███▌ | 133993/371472 [10:38:50<18:31:48, 3.56it/s] 36%|███▌ | 133994/371472 [10:38:51<18:37:04, 3.54it/s] 36%|███▌ | 133995/371472 [10:38:51<17:52:19, 3.69it/s] 36%|███▌ | 133996/371472 [10:38:51<17:16:15, 3.82it/s] 36%|███▌ | 133997/371472 [10:38:51<16:53:43, 3.90it/s] 36%|███▌ | 133998/371472 [10:38:52<18:01:23, 3.66it/s] 36%|███▌ | 133999/371472 [10:38:52<18:24:04, 3.58it/s] 36%|███▌ | 134000/371472 [10:38:52<19:09:37, 3.44it/s] {'loss': 3.1425, 'learning_rate': 6.756555840461979e-07, 'epoch': 5.77} + 36%|███▌ | 134000/371472 [10:38:52<19:09:37, 3.44it/s] 36%|███▌ | 134001/371472 [10:38:53<18:18:14, 3.60it/s] 36%|███▌ | 134002/371472 [10:38:53<18:11:00, 3.63it/s] 36%|███▌ | 134003/371472 [10:38:53<18:40:30, 3.53it/s] 36%|███▌ | 134004/371472 [10:38:53<18:25:47, 3.58it/s] 36%|███▌ | 134005/371472 [10:38:54<17:51:08, 3.69it/s] 36%|███▌ | 134006/371472 [10:38:54<17:27:43, 3.78it/s] 36%|███▌ | 134007/371472 [10:38:54<16:54:09, 3.90it/s] 36%|███▌ | 134008/371472 [10:38:54<16:45:46, 3.93it/s] 36%|███▌ | 134009/371472 [10:38:55<18:00:34, 3.66it/s] 36%|███▌ | 134010/371472 [10:38:55<19:35:44, 3.37it/s] 36%|███▌ | 134011/371472 [10:38:55<19:47:34, 3.33it/s] 36%|███▌ | 134012/371472 [10:38:56<18:52:09, 3.50it/s] 36%|███▌ | 134013/371472 [10:38:56<18:36:13, 3.55it/s] 36%|███▌ | 134014/371472 [10:38:56<17:58:39, 3.67it/s] 36%|███▌ | 134015/371472 [10:38:56<17:38:07, 3.74it/s] 36%|███▌ | 134016/371472 [10:38:57<17:48:34, 3.70it/s] 36%|███▌ | 134017/371472 [10:38:57<17:26:19, 3.78it/s] 36%|███▌ | 134018/371472 [10:38:57<17:07:26, 3.85it/s] 36%|███▌ | 134019/371472 [10:38:58<19:29:53, 3.38it/s] 36%|███▌ | 134020/371472 [10:38:58<18:37:14, 3.54it/s] {'loss': 3.1767, 'learning_rate': 6.75607102070719e-07, 'epoch': 5.77} + 36%|███▌ | 134020/371472 [10:38:58<18:37:14, 3.54it/s] 36%|███▌ | 134021/371472 [10:38:58<18:43:59, 3.52it/s] 36%|███▌ | 134022/371472 [10:38:58<18:52:25, 3.49it/s] 36%|███▌ | 134023/371472 [10:38:59<17:58:20, 3.67it/s] 36%|███▌ | 134024/371472 [10:38:59<18:43:09, 3.52it/s] 36%|███▌ | 134025/371472 [10:38:59<18:01:14, 3.66it/s] 36%|███▌ | 134026/371472 [10:39:00<18:26:44, 3.58it/s] 36%|███▌ | 134027/371472 [10:39:00<19:02:17, 3.46it/s] 36%|███▌ | 134028/371472 [10:39:00<18:52:44, 3.49it/s] 36%|███▌ | 134029/371472 [10:39:00<18:24:34, 3.58it/s] 36%|███▌ | 134030/371472 [10:39:01<18:27:15, 3.57it/s] 36%|███▌ | 134031/371472 [10:39:01<17:58:21, 3.67it/s] 36%|███▌ | 134032/371472 [10:39:01<18:44:51, 3.52it/s] 36%|███▌ | 134033/371472 [10:39:01<18:28:41, 3.57it/s] 36%|███▌ | 134034/371472 [10:39:02<17:54:44, 3.68it/s] 36%|███▌ | 134035/371472 [10:39:02<17:19:16, 3.81it/s] 36%|███▌ | 134036/371472 [10:39:02<18:21:55, 3.59it/s] 36%|███▌ | 134037/371472 [10:39:03<18:05:54, 3.64it/s] 36%|███▌ | 134038/371472 [10:39:03<17:54:57, 3.68it/s] 36%|███▌ | 134039/371472 [10:39:03<17:39:02, 3.74it/s] 36%|███▌ | 134040/371472 [10:39:03<17:45:54, 3.71it/s] {'loss': 3.1568, 'learning_rate': 6.755586200952401e-07, 'epoch': 5.77} + 36%|███▌ | 134040/371472 [10:39:03<17:45:54, 3.71it/s] 36%|███▌ | 134041/371472 [10:39:04<17:44:17, 3.72it/s] 36%|███▌ | 134042/371472 [10:39:04<17:13:49, 3.83it/s] 36%|███▌ | 134043/371472 [10:39:04<17:38:24, 3.74it/s] 36%|███▌ | 134044/371472 [10:39:04<17:56:21, 3.68it/s] 36%|███▌ | 134045/371472 [10:39:05<19:51:58, 3.32it/s] 36%|███▌ | 134046/371472 [10:39:05<18:56:04, 3.48it/s] 36%|███▌ | 134047/371472 [10:39:05<19:29:25, 3.38it/s] 36%|███▌ | 134048/371472 [10:39:06<18:22:13, 3.59it/s] 36%|███▌ | 134049/371472 [10:39:06<18:47:39, 3.51it/s] 36%|███▌ | 134050/371472 [10:39:06<18:46:10, 3.51it/s] 36%|███▌ | 134051/371472 [10:39:06<18:01:31, 3.66it/s] 36%|███▌ | 134052/371472 [10:39:07<17:54:02, 3.68it/s] 36%|███▌ | 134053/371472 [10:39:07<18:22:19, 3.59it/s] 36%|███▌ | 134054/371472 [10:39:07<18:53:19, 3.49it/s] 36%|███▌ | 134055/371472 [10:39:08<20:23:49, 3.23it/s] 36%|███▌ | 134056/371472 [10:39:08<19:34:22, 3.37it/s] 36%|███▌ | 134057/371472 [10:39:08<18:57:19, 3.48it/s] 36%|███▌ | 134058/371472 [10:39:08<19:11:29, 3.44it/s] 36%|███▌ | 134059/371472 [10:39:09<19:51:51, 3.32it/s] 36%|███▌ | 134060/371472 [10:39:09<19:01:25, 3.47it/s] {'loss': 3.164, 'learning_rate': 6.755101381197613e-07, 'epoch': 5.77} + 36%|███▌ | 134060/371472 [10:39:09<19:01:25, 3.47it/s] 36%|███▌ | 134061/371472 [10:39:09<19:51:50, 3.32it/s] 36%|███▌ | 134062/371472 [10:39:10<19:16:20, 3.42it/s] 36%|███▌ | 134063/371472 [10:39:10<18:45:23, 3.52it/s] 36%|███▌ | 134064/371472 [10:39:10<18:42:05, 3.53it/s] 36%|███▌ | 134065/371472 [10:39:10<18:19:04, 3.60it/s] 36%|███▌ | 134066/371472 [10:39:11<20:11:24, 3.27it/s] 36%|███▌ | 134067/371472 [10:39:11<20:31:22, 3.21it/s] 36%|███▌ | 134068/371472 [10:39:11<19:42:21, 3.35it/s] 36%|███▌ | 134069/371472 [10:39:12<19:07:22, 3.45it/s] 36%|███▌ | 134070/371472 [10:39:12<18:36:20, 3.54it/s] 36%|███▌ | 134071/371472 [10:39:12<19:33:54, 3.37it/s] 36%|███▌ | 134072/371472 [10:39:13<19:03:07, 3.46it/s] 36%|███▌ | 134073/371472 [10:39:13<20:02:47, 3.29it/s] 36%|███▌ | 134074/371472 [10:39:13<18:59:41, 3.47it/s] 36%|███▌ | 134075/371472 [10:39:13<18:28:21, 3.57it/s] 36%|███▌ | 134076/371472 [10:39:14<18:43:25, 3.52it/s] 36%|███▌ | 134077/371472 [10:39:14<18:32:21, 3.56it/s] 36%|███▌ | 134078/371472 [10:39:14<18:44:25, 3.52it/s] 36%|███▌ | 134079/371472 [10:39:15<17:48:18, 3.70it/s] 36%|███▌ | 134080/371472 [10:39:15<18:06:12, 3.64it/s] {'loss': 3.1444, 'learning_rate': 6.754616561442823e-07, 'epoch': 5.78} + 36%|███▌ | 134080/371472 [10:39:15<18:06:12, 3.64it/s] 36%|███▌ | 134081/371472 [10:39:15<18:23:54, 3.58it/s] 36%|███▌ | 134082/371472 [10:39:15<18:16:47, 3.61it/s] 36%|███▌ | 134083/371472 [10:39:16<18:49:10, 3.50it/s] 36%|███▌ | 134084/371472 [10:39:16<19:51:48, 3.32it/s] 36%|███▌ | 134085/371472 [10:39:16<19:13:48, 3.43it/s] 36%|███▌ | 134086/371472 [10:39:17<18:32:41, 3.56it/s] 36%|███▌ | 134087/371472 [10:39:17<18:31:58, 3.56it/s] 36%|███▌ | 134088/371472 [10:39:17<19:10:50, 3.44it/s] 36%|███▌ | 134089/371472 [10:39:17<18:23:01, 3.59it/s] 36%|███▌ | 134090/371472 [10:39:18<18:45:19, 3.52it/s] 36%|███▌ | 134091/371472 [10:39:18<18:06:23, 3.64it/s] 36%|███▌ | 134092/371472 [10:39:18<18:15:13, 3.61it/s] 36%|███▌ | 134093/371472 [10:39:18<17:22:41, 3.79it/s] 36%|███▌ | 134094/371472 [10:39:19<17:57:21, 3.67it/s] 36%|███▌ | 134095/371472 [10:39:19<17:39:37, 3.73it/s] 36%|███▌ | 134096/371472 [10:39:19<18:30:52, 3.56it/s] 36%|███▌ | 134097/371472 [10:39:20<19:01:52, 3.46it/s] 36%|███▌ | 134098/371472 [10:39:20<19:29:55, 3.38it/s] 36%|███▌ | 134099/371472 [10:39:20<18:33:48, 3.55it/s] 36%|███▌ | 134100/371472 [10:39:20<18:04:29, 3.65it/s] {'loss': 3.2324, 'learning_rate': 6.754131741688035e-07, 'epoch': 5.78} + 36%|███▌ | 134100/371472 [10:39:20<18:04:29, 3.65it/s] 36%|███▌ | 134101/371472 [10:39:21<18:05:13, 3.65it/s] 36%|███▌ | 134102/371472 [10:39:21<18:16:00, 3.61it/s] 36%|███▌ | 134103/371472 [10:39:21<17:22:50, 3.79it/s] 36%|███▌ | 134104/371472 [10:39:22<17:15:00, 3.82it/s] 36%|███▌ | 134105/371472 [10:39:22<17:56:40, 3.67it/s] 36%|███▌ | 134106/371472 [10:39:22<20:45:29, 3.18it/s] 36%|███▌ | 134107/371472 [10:39:22<19:49:15, 3.33it/s] 36%|███▌ | 134108/371472 [10:39:23<19:15:47, 3.42it/s] 36%|███▌ | 134109/371472 [10:39:23<18:47:01, 3.51it/s] 36%|███▌ | 134110/371472 [10:39:23<19:48:05, 3.33it/s] 36%|███▌ | 134111/371472 [10:39:24<18:33:59, 3.55it/s] 36%|███▌ | 134112/371472 [10:39:24<18:45:27, 3.51it/s] 36%|███▌ | 134113/371472 [10:39:24<18:44:35, 3.52it/s] 36%|███▌ | 134114/371472 [10:39:25<20:17:28, 3.25it/s] 36%|███▌ | 134115/371472 [10:39:25<19:20:29, 3.41it/s] 36%|███▌ | 134116/371472 [10:39:25<19:06:31, 3.45it/s] 36%|███▌ | 134117/371472 [10:39:25<17:58:04, 3.67it/s] 36%|███▌ | 134118/371472 [10:39:26<18:12:28, 3.62it/s] 36%|███▌ | 134119/371472 [10:39:26<18:27:02, 3.57it/s] 36%|███▌ | 134120/371472 [10:39:26<18:32:01, 3.56it/s] {'loss': 3.1375, 'learning_rate': 6.753646921933245e-07, 'epoch': 5.78} + 36%|███▌ | 134120/371472 [10:39:26<18:32:01, 3.56it/s] 36%|███▌ | 134121/371472 [10:39:26<17:45:37, 3.71it/s] 36%|███▌ | 134122/371472 [10:39:27<17:33:15, 3.76it/s] 36%|███▌ | 134123/371472 [10:39:27<18:31:51, 3.56it/s] 36%|███▌ | 134124/371472 [10:39:27<17:52:48, 3.69it/s] 36%|███▌ | 134125/371472 [10:39:28<18:17:25, 3.60it/s] 36%|███▌ | 134126/371472 [10:39:28<18:42:38, 3.52it/s] 36%|███▌ | 134127/371472 [10:39:28<19:02:52, 3.46it/s] 36%|███▌ | 134128/371472 [10:39:28<19:19:30, 3.41it/s] 36%|███▌ | 134129/371472 [10:39:29<19:50:49, 3.32it/s] 36%|███▌ | 134130/371472 [10:39:29<19:51:08, 3.32it/s] 36%|███▌ | 134131/371472 [10:39:29<18:58:14, 3.48it/s] 36%|███▌ | 134132/371472 [10:39:30<18:55:40, 3.48it/s] 36%|███▌ | 134133/371472 [10:39:30<18:48:00, 3.51it/s] 36%|███▌ | 134134/371472 [10:39:30<19:13:45, 3.43it/s] 36%|███▌ | 134135/371472 [10:39:30<19:00:11, 3.47it/s] 36%|███▌ | 134136/371472 [10:39:31<19:48:48, 3.33it/s] 36%|███▌ | 134137/371472 [10:39:31<19:06:27, 3.45it/s] 36%|███▌ | 134138/371472 [10:39:31<18:39:43, 3.53it/s] 36%|███▌ | 134139/371472 [10:39:32<18:00:00, 3.66it/s] 36%|███▌ | 134140/371472 [10:39:32<17:06:31, 3.85it/s] {'loss': 2.9544, 'learning_rate': 6.753162102178456e-07, 'epoch': 5.78} + 36%|███▌ | 134140/371472 [10:39:32<17:06:31, 3.85it/s] 36%|███▌ | 134141/371472 [10:39:32<17:12:51, 3.83it/s] 36%|███▌ | 134142/371472 [10:39:32<17:04:49, 3.86it/s] 36%|███▌ | 134143/371472 [10:39:33<16:59:32, 3.88it/s] 36%|███▌ | 134144/371472 [10:39:33<16:53:01, 3.90it/s] 36%|███▌ | 134145/371472 [10:39:33<18:37:12, 3.54it/s] 36%|███▌ | 134146/371472 [10:39:33<18:13:54, 3.62it/s] 36%|███▌ | 134147/371472 [10:39:34<18:22:38, 3.59it/s] 36%|███▌ | 134148/371472 [10:39:34<18:15:26, 3.61it/s] 36%|███▌ | 134149/371472 [10:39:34<18:05:34, 3.64it/s] 36%|███▌ | 134150/371472 [10:39:35<19:01:38, 3.46it/s] 36%|███▌ | 134151/371472 [10:39:35<20:41:16, 3.19it/s] 36%|███▌ | 134152/371472 [10:39:35<19:59:04, 3.30it/s] 36%|███▌ | 134153/371472 [10:39:36<20:24:06, 3.23it/s] 36%|███▌ | 134154/371472 [10:39:36<19:25:00, 3.40it/s] 36%|███▌ | 134155/371472 [10:39:36<18:25:50, 3.58it/s] 36%|███▌ | 134156/371472 [10:39:36<18:14:49, 3.61it/s] 36%|███▌ | 134157/371472 [10:39:37<18:34:57, 3.55it/s] 36%|███▌ | 134158/371472 [10:39:37<18:38:10, 3.54it/s] 36%|███▌ | 134159/371472 [10:39:37<18:44:00, 3.52it/s] 36%|███▌ | 134160/371472 [10:39:37<19:07:00, 3.45it/s] {'loss': 3.0062, 'learning_rate': 6.752677282423667e-07, 'epoch': 5.78} + 36%|███▌ | 134160/371472 [10:39:37<19:07:00, 3.45it/s] 36%|███▌ | 134161/371472 [10:39:38<18:30:26, 3.56it/s] 36%|███▌ | 134162/371472 [10:39:38<18:27:56, 3.57it/s] 36%|███▌ | 134163/371472 [10:39:38<18:55:01, 3.48it/s] 36%|███▌ | 134164/371472 [10:39:39<18:34:57, 3.55it/s] 36%|███▌ | 134165/371472 [10:39:39<20:16:53, 3.25it/s] 36%|███▌ | 134166/371472 [10:39:39<19:59:20, 3.30it/s] 36%|███▌ | 134167/371472 [10:39:40<19:24:52, 3.40it/s] 36%|███▌ | 134168/371472 [10:39:40<18:18:53, 3.60it/s] 36%|███▌ | 134169/371472 [10:39:40<18:09:12, 3.63it/s] 36%|███▌ | 134170/371472 [10:39:40<18:21:08, 3.59it/s] 36%|███▌ | 134171/371472 [10:39:41<17:55:27, 3.68it/s] 36%|███▌ | 134172/371472 [10:39:41<17:13:23, 3.83it/s] 36%|███▌ | 134173/371472 [10:39:41<17:40:13, 3.73it/s] 36%|███▌ | 134174/371472 [10:39:41<17:32:19, 3.76it/s] 36%|███▌ | 134175/371472 [10:39:42<18:06:49, 3.64it/s] 36%|███▌ | 134176/371472 [10:39:42<17:33:23, 3.75it/s] 36%|███▌ | 134177/371472 [10:39:42<17:28:49, 3.77it/s] 36%|███▌ | 134178/371472 [10:39:42<17:14:12, 3.82it/s] 36%|███▌ | 134179/371472 [10:39:43<17:18:52, 3.81it/s] 36%|███▌ | 134180/371472 [10:39:43<19:11:27, 3.43it/s] {'loss': 3.0994, 'learning_rate': 6.752192462668879e-07, 'epoch': 5.78} + 36%|███▌ | 134180/371472 [10:39:43<19:11:27, 3.43it/s] 36%|███▌ | 134181/371472 [10:39:43<18:38:36, 3.54it/s] 36%|███▌ | 134182/371472 [10:39:44<19:24:37, 3.40it/s] 36%|███▌ | 134183/371472 [10:39:44<20:03:18, 3.29it/s] 36%|███▌ | 134184/371472 [10:39:44<19:08:59, 3.44it/s] 36%|███▌ | 134185/371472 [10:39:44<18:38:05, 3.54it/s] 36%|███▌ | 134186/371472 [10:39:45<19:07:16, 3.45it/s] 36%|███▌ | 134187/371472 [10:39:45<20:55:54, 3.15it/s] 36%|███▌ | 134188/371472 [10:39:45<19:59:04, 3.30it/s] 36%|███▌ | 134189/371472 [10:39:46<19:25:37, 3.39it/s] 36%|███▌ | 134190/371472 [10:39:46<19:11:42, 3.43it/s] 36%|███▌ | 134191/371472 [10:39:46<18:57:23, 3.48it/s] 36%|███▌ | 134192/371472 [10:39:47<18:52:50, 3.49it/s] 36%|███▌ | 134193/371472 [10:39:47<18:34:27, 3.55it/s] 36%|███▌ | 134194/371472 [10:39:47<19:15:51, 3.42it/s] 36%|███▌ | 134195/371472 [10:39:48<20:23:30, 3.23it/s] 36%|███▌ | 134196/371472 [10:39:48<19:19:47, 3.41it/s] 36%|███▌ | 134197/371472 [10:39:48<19:10:27, 3.44it/s] 36%|███▌ | 134198/371472 [10:39:48<19:17:13, 3.42it/s] 36%|███▌ | 134199/371472 [10:39:49<18:40:48, 3.53it/s] 36%|███▌ | 134200/371472 [10:39:49<19:14:02, 3.43it/s] {'loss': 3.1016, 'learning_rate': 6.75170764291409e-07, 'epoch': 5.78} + 36%|███▌ | 134200/371472 [10:39:49<19:14:02, 3.43it/s] 36%|███▌ | 134201/371472 [10:39:49<18:48:42, 3.50it/s] 36%|███▌ | 134202/371472 [10:39:50<19:31:46, 3.37it/s] 36%|███▌ | 134203/371472 [10:39:50<18:51:21, 3.50it/s] 36%|███▌ | 134204/371472 [10:39:50<18:56:58, 3.48it/s] 36%|███▌ | 134205/371472 [10:39:50<18:38:01, 3.54it/s] 36%|███▌ | 134206/371472 [10:39:51<18:40:46, 3.53it/s] 36%|███▌ | 134207/371472 [10:39:51<18:21:35, 3.59it/s] 36%|███▌ | 134208/371472 [10:39:51<17:57:21, 3.67it/s] 36%|███▌ | 134209/371472 [10:39:51<18:56:54, 3.48it/s] 36%|███▌ | 134210/371472 [10:39:52<18:19:10, 3.60it/s] 36%|███▌ | 134211/371472 [10:39:52<18:41:10, 3.53it/s] 36%|███▌ | 134212/371472 [10:39:52<18:13:39, 3.62it/s] 36%|███▌ | 134213/371472 [10:39:53<18:26:07, 3.57it/s] 36%|███▌ | 134214/371472 [10:39:53<18:34:48, 3.55it/s] 36%|███▌ | 134215/371472 [10:39:53<18:36:29, 3.54it/s] 36%|███▌ | 134216/371472 [10:39:53<18:12:35, 3.62it/s] 36%|███▌ | 134217/371472 [10:39:54<17:55:26, 3.68it/s] 36%|███▌ | 134218/371472 [10:39:54<17:59:15, 3.66it/s] 36%|███▌ | 134219/371472 [10:39:54<18:34:27, 3.55it/s] 36%|███▌ | 134220/371472 [10:39:54<17:37:02, 3.74it/s] {'loss': 3.2393, 'learning_rate': 6.751222823159301e-07, 'epoch': 5.78} + 36%|███▌ | 134220/371472 [10:39:54<17:37:02, 3.74it/s] 36%|███▌ | 134221/371472 [10:39:55<17:19:29, 3.80it/s] 36%|███▌ | 134222/371472 [10:39:55<18:23:25, 3.58it/s] 36%|███▌ | 134223/371472 [10:39:55<17:43:20, 3.72it/s] 36%|███▌ | 134224/371472 [10:39:56<18:05:24, 3.64it/s] 36%|███▌ | 134225/371472 [10:39:56<17:55:48, 3.68it/s] 36%|███▌ | 134226/371472 [10:39:56<17:48:51, 3.70it/s] 36%|███▌ | 134227/371472 [10:39:56<17:10:35, 3.84it/s] 36%|███▌ | 134228/371472 [10:39:57<17:27:09, 3.78it/s] 36%|███▌ | 134229/371472 [10:39:57<17:12:36, 3.83it/s] 36%|███▌ | 134230/371472 [10:39:57<17:03:07, 3.86it/s] 36%|███▌ | 134231/371472 [10:39:57<18:55:56, 3.48it/s] 36%|███▌ | 134232/371472 [10:39:58<17:58:33, 3.67it/s] 36%|███▌ | 134233/371472 [10:39:58<17:58:13, 3.67it/s] 36%|███▌ | 134234/371472 [10:39:58<20:47:30, 3.17it/s] 36%|███▌ | 134235/371472 [10:39:59<22:09:01, 2.98it/s] 36%|███▌ | 134236/371472 [10:39:59<21:25:45, 3.08it/s] 36%|███▌ | 134237/371472 [10:39:59<19:44:22, 3.34it/s] 36%|███▌ | 134238/371472 [10:40:00<19:04:06, 3.46it/s] 36%|███▌ | 134239/371472 [10:40:00<19:43:25, 3.34it/s] 36%|███▌ | 134240/371472 [10:40:00<21:12:56, 3.11it/s] {'loss': 3.1419, 'learning_rate': 6.750738003404511e-07, 'epoch': 5.78} + 36%|███▌ | 134240/371472 [10:40:00<21:12:56, 3.11it/s] 36%|███▌ | 134241/371472 [10:40:01<19:51:53, 3.32it/s] 36%|███▌ | 134242/371472 [10:40:01<20:43:55, 3.18it/s] 36%|███▌ | 134243/371472 [10:40:01<21:09:30, 3.11it/s] 36%|███▌ | 134244/371472 [10:40:01<19:46:55, 3.33it/s] 36%|███▌ | 134245/371472 [10:40:02<19:05:44, 3.45it/s] 36%|███▌ | 134246/371472 [10:40:02<18:54:40, 3.48it/s] 36%|███▌ | 134247/371472 [10:40:02<18:58:38, 3.47it/s] 36%|███▌ | 134248/371472 [10:40:03<18:27:41, 3.57it/s] 36%|███▌ | 134249/371472 [10:40:03<19:24:45, 3.39it/s] 36%|███▌ | 134250/371472 [10:40:03<19:14:37, 3.42it/s] 36%|███▌ | 134251/371472 [10:40:03<19:22:15, 3.40it/s] 36%|███▌ | 134252/371472 [10:40:04<21:54:50, 3.01it/s] 36%|███▌ | 134253/371472 [10:40:04<20:57:08, 3.14it/s] 36%|███▌ | 134254/371472 [10:40:04<20:02:02, 3.29it/s] 36%|███▌ | 134255/371472 [10:40:05<19:26:33, 3.39it/s] 36%|███▌ | 134256/371472 [10:40:05<19:01:12, 3.46it/s] 36%|███▌ | 134257/371472 [10:40:05<18:56:48, 3.48it/s] 36%|███▌ | 134258/371472 [10:40:06<22:47:47, 2.89it/s] 36%|███▌ | 134259/371472 [10:40:06<20:51:00, 3.16it/s] 36%|███▌ | 134260/371472 [10:40:06<21:53:10, 3.01it/s] {'loss': 2.9431, 'learning_rate': 6.750253183649722e-07, 'epoch': 5.78} + 36%|███▌ | 134260/371472 [10:40:06<21:53:10, 3.01it/s] 36%|███▌ | 134261/371472 [10:40:07<21:10:54, 3.11it/s] 36%|███▌ | 134262/371472 [10:40:07<20:20:17, 3.24it/s] 36%|███▌ | 134263/371472 [10:40:07<20:07:43, 3.27it/s] 36%|███▌ | 134264/371472 [10:40:08<18:51:53, 3.49it/s] 36%|███▌ | 134265/371472 [10:40:08<18:45:08, 3.51it/s] 36%|███▌ | 134266/371472 [10:40:08<18:02:51, 3.65it/s] 36%|███▌ | 134267/371472 [10:40:08<17:48:47, 3.70it/s] 36%|███▌ | 134268/371472 [10:40:09<17:53:26, 3.68it/s] 36%|███▌ | 134269/371472 [10:40:09<20:15:05, 3.25it/s] 36%|███▌ | 134270/371472 [10:40:09<20:45:08, 3.18it/s] 36%|███▌ | 134271/371472 [10:40:10<20:51:57, 3.16it/s] 36%|███▌ | 134272/371472 [10:40:10<20:16:22, 3.25it/s] 36%|███▌ | 134273/371472 [10:40:10<20:29:40, 3.21it/s] 36%|███▌ | 134274/371472 [10:40:10<19:26:05, 3.39it/s] 36%|███▌ | 134275/371472 [10:40:11<18:25:40, 3.58it/s] 36%|███▌ | 134276/371472 [10:40:11<18:41:28, 3.53it/s] 36%|███▌ | 134277/371472 [10:40:11<19:48:29, 3.33it/s] 36%|███▌ | 134278/371472 [10:40:12<19:09:11, 3.44it/s] 36%|███▌ | 134279/371472 [10:40:12<19:17:05, 3.42it/s] 36%|███▌ | 134280/371472 [10:40:12<18:14:20, 3.61it/s] {'loss': 3.0403, 'learning_rate': 6.749768363894934e-07, 'epoch': 5.78} + 36%|███▌ | 134280/371472 [10:40:12<18:14:20, 3.61it/s] 36%|███▌ | 134281/371472 [10:40:12<18:22:28, 3.59it/s] 36%|███▌ | 134282/371472 [10:40:13<18:13:15, 3.62it/s] 36%|███▌ | 134283/371472 [10:40:13<17:40:08, 3.73it/s] 36%|███▌ | 134284/371472 [10:40:13<18:09:43, 3.63it/s] 36%|███▌ | 134285/371472 [10:40:14<17:31:09, 3.76it/s] 36%|███▌ | 134286/371472 [10:40:14<18:08:08, 3.63it/s] 36%|███▌ | 134287/371472 [10:40:14<19:04:57, 3.45it/s] 36%|███▌ | 134288/371472 [10:40:14<18:55:21, 3.48it/s] 36%|███▌ | 134289/371472 [10:40:15<18:16:02, 3.61it/s] 36%|███▌ | 134290/371472 [10:40:15<19:08:38, 3.44it/s] 36%|███▌ | 134291/371472 [10:40:15<18:22:21, 3.59it/s] 36%|███▌ | 134292/371472 [10:40:16<18:35:18, 3.54it/s] 36%|███▌ | 134293/371472 [10:40:16<18:15:48, 3.61it/s] 36%|███▌ | 134294/371472 [10:40:16<18:38:40, 3.53it/s] 36%|███▌ | 134295/371472 [10:40:16<18:25:57, 3.57it/s] 36%|███▌ | 134296/371472 [10:40:17<19:11:02, 3.43it/s] 36%|███▌ | 134297/371472 [10:40:17<18:27:04, 3.57it/s] 36%|███▌ | 134298/371472 [10:40:17<18:24:10, 3.58it/s] 36%|███▌ | 134299/371472 [10:40:18<18:47:27, 3.51it/s] 36%|███▌ | 134300/371472 [10:40:18<18:39:32, 3.53it/s] {'loss': 3.2636, 'learning_rate': 6.749283544140145e-07, 'epoch': 5.78} + 36%|███▌ | 134300/371472 [10:40:18<18:39:32, 3.53it/s] 36%|███▌ | 134301/371472 [10:40:18<17:46:40, 3.71it/s] 36%|███▌ | 134302/371472 [10:40:18<20:13:40, 3.26it/s] 36%|███▌ | 134303/371472 [10:40:19<19:20:52, 3.41it/s] 36%|███▌ | 134304/371472 [10:40:19<18:30:53, 3.56it/s] 36%|███▌ | 134305/371472 [10:40:19<18:36:37, 3.54it/s] 36%|███▌ | 134306/371472 [10:40:19<18:01:05, 3.66it/s] 36%|███▌ | 134307/371472 [10:40:20<17:19:35, 3.80it/s] 36%|███▌ | 134308/371472 [10:40:20<17:41:20, 3.72it/s] 36%|███▌ | 134309/371472 [10:40:20<17:30:02, 3.76it/s] 36%|███▌ | 134310/371472 [10:40:20<17:10:30, 3.84it/s] 36%|███▌ | 134311/371472 [10:40:21<17:01:31, 3.87it/s] 36%|███▌ | 134312/371472 [10:40:21<18:09:25, 3.63it/s] 36%|███▌ | 134313/371472 [10:40:21<17:51:21, 3.69it/s] 36%|███▌ | 134314/371472 [10:40:22<18:42:10, 3.52it/s] 36%|███▌ | 134315/371472 [10:40:22<19:13:25, 3.43it/s] 36%|███▌ | 134316/371472 [10:40:23<25:53:11, 2.54it/s] 36%|███▌ | 134317/371472 [10:40:23<22:38:23, 2.91it/s] 36%|███▌ | 134318/371472 [10:40:23<20:24:31, 3.23it/s] 36%|███▌ | 134319/371472 [10:40:23<19:07:32, 3.44it/s] 36%|███▌ | 134320/371472 [10:40:24<18:42:46, 3.52it/s] {'loss': 3.1755, 'learning_rate': 6.748798724385356e-07, 'epoch': 5.79} + 36%|███▌ | 134320/371472 [10:40:24<18:42:46, 3.52it/s] 36%|███▌ | 134321/371472 [10:40:24<18:10:44, 3.62it/s] 36%|███▌ | 134322/371472 [10:40:24<19:10:19, 3.44it/s] 36%|███▌ | 134323/371472 [10:40:24<18:40:39, 3.53it/s] 36%|███▌ | 134324/371472 [10:40:25<19:55:28, 3.31it/s] 36%|███▌ | 134325/371472 [10:40:25<19:07:59, 3.44it/s] 36%|███▌ | 134326/371472 [10:40:25<18:33:59, 3.55it/s] 36%|███▌ | 134327/371472 [10:40:26<18:29:36, 3.56it/s] 36%|███▌ | 134328/371472 [10:40:26<18:02:38, 3.65it/s] 36%|███▌ | 134329/371472 [10:40:26<17:51:32, 3.69it/s] 36%|███▌ | 134330/371472 [10:40:26<17:28:11, 3.77it/s] 36%|███▌ | 134331/371472 [10:40:27<17:18:34, 3.81it/s] 36%|███▌ | 134332/371472 [10:40:27<19:40:05, 3.35it/s] 36%|███▌ | 134333/371472 [10:40:27<19:16:09, 3.42it/s] 36%|███▌ | 134334/371472 [10:40:27<18:21:29, 3.59it/s] 36%|███▌ | 134335/371472 [10:40:28<19:02:39, 3.46it/s] 36%|███▌ | 134336/371472 [10:40:28<19:43:57, 3.34it/s] 36%|███▌ | 134337/371472 [10:40:28<20:23:38, 3.23it/s] 36%|███▌ | 134338/371472 [10:40:29<20:00:25, 3.29it/s] 36%|███▌ | 134339/371472 [10:40:29<19:52:54, 3.31it/s] 36%|███▌ | 134340/371472 [10:40:29<19:01:38, 3.46it/s] {'loss': 2.9281, 'learning_rate': 6.748313904630567e-07, 'epoch': 5.79} + 36%|███▌ | 134340/371472 [10:40:29<19:01:38, 3.46it/s] 36%|███▌ | 134341/371472 [10:40:30<18:34:25, 3.55it/s] 36%|███▌ | 134342/371472 [10:40:30<19:14:57, 3.42it/s] 36%|███▌ | 134343/371472 [10:40:30<18:26:45, 3.57it/s] 36%|███▌ | 134344/371472 [10:40:30<17:49:31, 3.70it/s] 36%|███▌ | 134345/371472 [10:40:31<18:02:58, 3.65it/s] 36%|███▌ | 134346/371472 [10:40:31<17:42:18, 3.72it/s] 36%|███▌ | 134347/371472 [10:40:31<17:51:23, 3.69it/s] 36%|███▌ | 134348/371472 [10:40:32<18:26:15, 3.57it/s] 36%|███▌ | 134349/371472 [10:40:32<18:49:22, 3.50it/s] 36%|███▌ | 134350/371472 [10:40:32<20:39:44, 3.19it/s] 36%|███▌ | 134351/371472 [10:40:32<19:53:09, 3.31it/s] 36%|███▌ | 134352/371472 [10:40:33<21:16:13, 3.10it/s] 36%|███▌ | 134353/371472 [10:40:33<20:21:19, 3.24it/s] 36%|███▌ | 134354/371472 [10:40:33<19:05:01, 3.45it/s] 36%|███▌ | 134355/371472 [10:40:34<18:18:28, 3.60it/s] 36%|███▌ | 134356/371472 [10:40:34<18:03:39, 3.65it/s] 36%|███▌ | 134357/371472 [10:40:34<18:25:10, 3.58it/s] 36%|███▌ | 134358/371472 [10:40:34<18:56:11, 3.48it/s] 36%|███▌ | 134359/371472 [10:40:35<19:11:56, 3.43it/s] 36%|███▌ | 134360/371472 [10:40:35<18:44:46, 3.51it/s] {'loss': 3.0516, 'learning_rate': 6.747829084875778e-07, 'epoch': 5.79} + 36%|███▌ | 134360/371472 [10:40:35<18:44:46, 3.51it/s] 36%|███▌ | 134361/371472 [10:40:35<18:39:18, 3.53it/s] 36%|███▌ | 134362/371472 [10:40:36<19:00:07, 3.47it/s] 36%|███▌ | 134363/371472 [10:40:36<19:02:20, 3.46it/s] 36%|███▌ | 134364/371472 [10:40:36<18:59:48, 3.47it/s] 36%|███▌ | 134365/371472 [10:40:37<20:02:56, 3.29it/s] 36%|███▌ | 134366/371472 [10:40:37<19:49:08, 3.32it/s] 36%|███▌ | 134367/371472 [10:40:37<20:17:04, 3.25it/s] 36%|███▌ | 134368/371472 [10:40:38<21:57:59, 3.00it/s] 36%|███▌ | 134369/371472 [10:40:38<20:02:16, 3.29it/s] 36%|███▌ | 134370/371472 [10:40:38<20:43:48, 3.18it/s] 36%|███▌ | 134371/371472 [10:40:38<20:42:51, 3.18it/s] 36%|███▌ | 134372/371472 [10:40:39<20:24:54, 3.23it/s] 36%|███▌ | 134373/371472 [10:40:39<20:25:15, 3.23it/s] 36%|███▌ | 134374/371472 [10:40:39<20:55:42, 3.15it/s] 36%|███▌ | 134375/371472 [10:40:40<21:26:29, 3.07it/s] 36%|███▌ | 134376/371472 [10:40:40<22:13:56, 2.96it/s] 36%|███▌ | 134377/371472 [10:40:40<20:13:48, 3.26it/s] 36%|███▌ | 134378/371472 [10:40:41<19:15:01, 3.42it/s] 36%|███▌ | 134379/371472 [10:40:41<19:08:40, 3.44it/s] 36%|███▌ | 134380/371472 [10:40:41<20:57:03, 3.14it/s] {'loss': 3.0372, 'learning_rate': 6.747344265120989e-07, 'epoch': 5.79} + 36%|███▌ | 134380/371472 [10:40:41<20:57:03, 3.14it/s] 36%|███▌ | 134381/371472 [10:40:42<19:45:33, 3.33it/s] 36%|███▌ | 134382/371472 [10:40:42<19:36:46, 3.36it/s] 36%|███▌ | 134383/371472 [10:40:42<19:02:17, 3.46it/s] 36%|███▌ | 134384/371472 [10:40:42<18:12:16, 3.62it/s] 36%|███▌ | 134385/371472 [10:40:43<17:24:19, 3.78it/s] 36%|███▌ | 134386/371472 [10:40:43<19:00:33, 3.46it/s] 36%|███▌ | 134387/371472 [10:40:43<18:45:43, 3.51it/s] 36%|███▌ | 134388/371472 [10:40:43<18:51:54, 3.49it/s] 36%|███▌ | 134389/371472 [10:40:44<19:41:08, 3.35it/s] 36%|███▌ | 134390/371472 [10:40:44<18:59:35, 3.47it/s] 36%|███▌ | 134391/371472 [10:40:44<18:08:09, 3.63it/s] 36%|███▌ | 134392/371472 [10:40:45<17:39:23, 3.73it/s] 36%|███▌ | 134393/371472 [10:40:45<17:23:21, 3.79it/s] 36%|███▌ | 134394/371472 [10:40:45<17:10:45, 3.83it/s] 36%|███▌ | 134395/371472 [10:40:45<17:12:52, 3.83it/s] 36%|███▌ | 134396/371472 [10:40:46<17:16:34, 3.81it/s] 36%|███▌ | 134397/371472 [10:40:46<17:09:09, 3.84it/s] 36%|███▌ | 134398/371472 [10:40:46<16:54:27, 3.89it/s] 36%|███▌ | 134399/371472 [10:40:46<16:55:20, 3.89it/s] 36%|███▌ | 134400/371472 [10:40:47<16:45:05, 3.93it/s] {'loss': 3.1118, 'learning_rate': 6.7468594453662e-07, 'epoch': 5.79} + 36%|███▌ | 134400/371472 [10:40:47<16:45:05, 3.93it/s] 36%|███▌ | 134401/371472 [10:40:47<16:34:18, 3.97it/s] 36%|███▌ | 134402/371472 [10:40:47<18:20:23, 3.59it/s] 36%|███▌ | 134403/371472 [10:40:47<18:06:56, 3.64it/s] 36%|███▌ | 134404/371472 [10:40:48<19:27:12, 3.39it/s] 36%|███▌ | 134405/371472 [10:40:48<19:27:57, 3.38it/s] 36%|███▌ | 134406/371472 [10:40:48<20:15:01, 3.25it/s] 36%|███▌ | 134407/371472 [10:40:49<19:03:43, 3.45it/s] 36%|███▌ | 134408/371472 [10:40:49<18:38:47, 3.53it/s] 36%|███▌ | 134409/371472 [10:40:49<17:35:14, 3.74it/s] 36%|███▌ | 134410/371472 [10:40:49<18:39:41, 3.53it/s] 36%|███▌ | 134411/371472 [10:40:50<19:22:56, 3.40it/s] 36%|███▌ | 134412/371472 [10:40:50<18:59:48, 3.47it/s] 36%|███▌ | 134413/371472 [10:40:50<18:06:40, 3.64it/s] 36%|███▌ | 134414/371472 [10:40:51<17:49:25, 3.69it/s] 36%|███▌ | 134415/371472 [10:40:51<17:58:43, 3.66it/s] 36%|███▌ | 134416/371472 [10:40:51<19:43:46, 3.34it/s] 36%|███▌ | 134417/371472 [10:40:51<18:45:55, 3.51it/s] 36%|███▌ | 134418/371472 [10:40:52<19:01:22, 3.46it/s] 36%|███▌ | 134419/371472 [10:40:52<18:52:50, 3.49it/s] 36%|███▌ | 134420/371472 [10:40:52<18:24:30, 3.58it/s] {'loss': 3.2026, 'learning_rate': 6.746374625611411e-07, 'epoch': 5.79} + 36%|███▌ | 134420/371472 [10:40:52<18:24:30, 3.58it/s] 36%|███▌ | 134421/371472 [10:40:53<19:44:53, 3.33it/s] 36%|███▌ | 134422/371472 [10:40:53<18:42:27, 3.52it/s] 36%|███▌ | 134423/371472 [10:40:53<17:52:48, 3.68it/s] 36%|███▌ | 134424/371472 [10:40:53<18:18:10, 3.60it/s] 36%|███▌ | 134425/371472 [10:40:54<17:57:02, 3.67it/s] 36%|███▌ | 134426/371472 [10:40:54<18:00:28, 3.66it/s] 36%|███▌ | 134427/371472 [10:40:54<17:15:29, 3.82it/s] 36%|███▌ | 134428/371472 [10:40:54<16:59:39, 3.87it/s] 36%|███▌ | 134429/371472 [10:40:55<16:35:11, 3.97it/s] 36%|███▌ | 134430/371472 [10:40:55<16:20:41, 4.03it/s] 36%|███▌ | 134431/371472 [10:40:55<16:54:13, 3.90it/s] 36%|███▌ | 134432/371472 [10:40:56<17:21:03, 3.79it/s] 36%|███▌ | 134433/371472 [10:40:56<17:50:46, 3.69it/s] 36%|███▌ | 134434/371472 [10:40:56<18:08:44, 3.63it/s] 36%|███▌ | 134435/371472 [10:40:56<18:34:43, 3.54it/s] 36%|███▌ | 134436/371472 [10:40:57<18:39:32, 3.53it/s] 36%|███▌ | 134437/371472 [10:40:57<18:22:59, 3.58it/s] 36%|███▌ | 134438/371472 [10:40:57<18:45:02, 3.51it/s] 36%|███▌ | 134439/371472 [10:40:58<18:57:43, 3.47it/s] 36%|███▌ | 134440/371472 [10:40:58<19:52:21, 3.31it/s] {'loss': 3.0647, 'learning_rate': 6.745889805856623e-07, 'epoch': 5.79} + 36%|███▌ | 134440/371472 [10:40:58<19:52:21, 3.31it/s] 36%|███▌ | 134441/371472 [10:40:58<19:51:38, 3.32it/s] 36%|███▌ | 134442/371472 [10:40:58<19:22:26, 3.40it/s] 36%|███▌ | 134443/371472 [10:40:59<19:06:39, 3.45it/s] 36%|███▌ | 134444/371472 [10:40:59<18:51:01, 3.49it/s] 36%|███▌ | 134445/371472 [10:40:59<18:30:57, 3.56it/s] 36%|███▌ | 134446/371472 [10:41:00<18:00:24, 3.66it/s] 36%|███▌ | 134447/371472 [10:41:00<18:00:00, 3.66it/s] 36%|███▌ | 134448/371472 [10:41:00<17:31:03, 3.76it/s] 36%|███▌ | 134449/371472 [10:41:00<17:34:30, 3.75it/s] 36%|███▌ | 134450/371472 [10:41:01<18:45:01, 3.51it/s] 36%|███▌ | 134451/371472 [10:41:01<18:59:42, 3.47it/s] 36%|███▌ | 134452/371472 [10:41:01<18:31:39, 3.55it/s] 36%|███▌ | 134453/371472 [10:41:02<18:57:43, 3.47it/s] 36%|███▌ | 134454/371472 [10:41:02<18:21:21, 3.59it/s] 36%|███▌ | 134455/371472 [10:41:02<18:16:49, 3.60it/s] 36%|███▌ | 134456/371472 [10:41:02<18:29:17, 3.56it/s] 36%|███▌ | 134457/371472 [10:41:03<18:34:02, 3.55it/s] 36%|███▌ | 134458/371472 [10:41:03<17:48:30, 3.70it/s] 36%|███▌ | 134459/371472 [10:41:03<17:35:53, 3.74it/s] 36%|███▌ | 134460/371472 [10:41:03<17:07:22, 3.84it/s] {'loss': 3.3857, 'learning_rate': 6.745404986101834e-07, 'epoch': 5.79} + 36%|███▌ | 134460/371472 [10:41:03<17:07:22, 3.84it/s] 36%|███▌ | 134461/371472 [10:41:04<18:33:57, 3.55it/s] 36%|███▌ | 134462/371472 [10:41:04<17:58:28, 3.66it/s] 36%|███▌ | 134463/371472 [10:41:04<18:21:32, 3.59it/s] 36%|███▌ | 134464/371472 [10:41:05<18:03:10, 3.65it/s] 36%|███▌ | 134465/371472 [10:41:05<18:17:22, 3.60it/s] 36%|███▌ | 134466/371472 [10:41:05<18:04:59, 3.64it/s] 36%|███▌ | 134467/371472 [10:41:05<17:48:56, 3.70it/s] 36%|███▌ | 134468/371472 [10:41:06<17:44:03, 3.71it/s] 36%|███▌ | 134469/371472 [10:41:06<18:24:37, 3.58it/s] 36%|███▌ | 134470/371472 [10:41:06<17:55:40, 3.67it/s] 36%|███▌ | 134471/371472 [10:41:06<17:29:09, 3.76it/s] 36%|███▌ | 134472/371472 [10:41:07<17:48:00, 3.70it/s] 36%|███▌ | 134473/371472 [10:41:07<17:56:33, 3.67it/s] 36%|███▌ | 134474/371472 [10:41:07<17:54:25, 3.68it/s] 36%|███▌ | 134475/371472 [10:41:08<18:34:49, 3.54it/s] 36%|███▌ | 134476/371472 [10:41:08<17:35:35, 3.74it/s] 36%|███▌ | 134477/371472 [10:41:08<18:02:00, 3.65it/s] 36%|███▌ | 134478/371472 [10:41:08<18:29:08, 3.56it/s] 36%|███▌ | 134479/371472 [10:41:09<18:00:52, 3.65it/s] 36%|███▌ | 134480/371472 [10:41:09<19:17:57, 3.41it/s] {'loss': 3.2017, 'learning_rate': 6.744920166347045e-07, 'epoch': 5.79} + 36%|███▌ | 134480/371472 [10:41:09<19:17:57, 3.41it/s] 36%|███▌ | 134481/371472 [10:41:09<18:25:05, 3.57it/s] 36%|███▌ | 134482/371472 [10:41:09<17:56:04, 3.67it/s] 36%|███▌ | 134483/371472 [10:41:10<18:49:15, 3.50it/s] 36%|███▌ | 134484/371472 [10:41:10<17:59:53, 3.66it/s] 36%|███▌ | 134485/371472 [10:41:10<18:47:40, 3.50it/s] 36%|███▌ | 134486/371472 [10:41:11<18:44:48, 3.51it/s] 36%|███▌ | 134487/371472 [10:41:11<19:40:46, 3.35it/s] 36%|███▌ | 134488/371472 [10:41:11<18:30:53, 3.56it/s] 36%|███▌ | 134489/371472 [10:41:11<19:19:17, 3.41it/s] 36%|███▌ | 134490/371472 [10:41:12<18:36:33, 3.54it/s] 36%|███▌ | 134491/371472 [10:41:12<20:36:11, 3.20it/s] 36%|███▌ | 134492/371472 [10:41:12<20:01:37, 3.29it/s] 36%|███▌ | 134493/371472 [10:41:13<19:13:58, 3.42it/s] 36%|███▌ | 134494/371472 [10:41:13<18:19:57, 3.59it/s] 36%|███▌ | 134495/371472 [10:41:13<18:18:25, 3.60it/s] 36%|███▌ | 134496/371472 [10:41:13<17:54:37, 3.68it/s] 36%|███▌ | 134497/371472 [10:41:14<19:14:21, 3.42it/s] 36%|███▌ | 134498/371472 [10:41:14<18:53:22, 3.48it/s] 36%|███▌ | 134499/371472 [10:41:14<17:58:27, 3.66it/s] 36%|███▌ | 134500/371472 [10:41:15<17:51:31, 3.69it/s] {'loss': 3.0898, 'learning_rate': 6.744435346592255e-07, 'epoch': 5.79} + 36%|███▌ | 134500/371472 [10:41:15<17:51:31, 3.69it/s] 36%|███▌ | 134501/371472 [10:41:15<17:49:35, 3.69it/s] 36%|███▌ | 134502/371472 [10:41:15<17:34:24, 3.75it/s] 36%|███▌ | 134503/371472 [10:41:15<17:13:18, 3.82it/s] 36%|███▌ | 134504/371472 [10:41:16<17:10:06, 3.83it/s] 36%|███▌ | 134505/371472 [10:41:16<21:24:53, 3.07it/s] 36%|███▌ | 134506/371472 [10:41:16<20:24:57, 3.22it/s] 36%|███▌ | 134507/371472 [10:41:17<20:10:08, 3.26it/s] 36%|███▌ | 134508/371472 [10:41:17<18:57:02, 3.47it/s] 36%|███▌ | 134509/371472 [10:41:17<19:19:19, 3.41it/s] 36%|███▌ | 134510/371472 [10:41:17<18:43:34, 3.52it/s] 36%|███▌ | 134511/371472 [10:41:18<18:52:53, 3.49it/s] 36%|███▌ | 134512/371472 [10:41:18<19:48:29, 3.32it/s] 36%|███▌ | 134513/371472 [10:41:18<18:50:58, 3.49it/s] 36%|███▌ | 134514/371472 [10:41:19<18:23:27, 3.58it/s] 36%|███▌ | 134515/371472 [10:41:19<18:07:09, 3.63it/s] 36%|███▌ | 134516/371472 [10:41:19<17:49:17, 3.69it/s] 36%|███▌ | 134517/371472 [10:41:19<17:21:09, 3.79it/s] 36%|███▌ | 134518/371472 [10:41:20<17:06:49, 3.85it/s] 36%|███▌ | 134519/371472 [10:41:20<17:50:22, 3.69it/s] 36%|███▌ | 134520/371472 [10:41:20<18:56:12, 3.48it/s] {'loss': 3.1422, 'learning_rate': 6.743950526837467e-07, 'epoch': 5.79} + 36%|███▌ | 134520/371472 [10:41:20<18:56:12, 3.48it/s] 36%|███▌ | 134521/371472 [10:41:21<19:25:23, 3.39it/s] 36%|███▌ | 134522/371472 [10:41:21<18:28:29, 3.56it/s] 36%|███▌ | 134523/371472 [10:41:21<18:33:41, 3.55it/s] 36%|███▌ | 134524/371472 [10:41:21<18:01:16, 3.65it/s] 36%|███▌ | 134525/371472 [10:41:22<17:32:43, 3.75it/s] 36%|███▌ | 134526/371472 [10:41:22<17:39:10, 3.73it/s] 36%|███▌ | 134527/371472 [10:41:22<18:36:26, 3.54it/s] 36%|███▌ | 134528/371472 [10:41:22<17:59:57, 3.66it/s] 36%|███▌ | 134529/371472 [10:41:23<18:25:52, 3.57it/s] 36%|███▌ | 134530/371472 [10:41:23<24:31:33, 2.68it/s] 36%|███▌ | 134531/371472 [10:41:24<22:37:16, 2.91it/s] 36%|███▌ | 134532/371472 [10:41:24<21:37:31, 3.04it/s] 36%|███▌ | 134533/371472 [10:41:24<20:21:37, 3.23it/s] 36%|███▌ | 134534/371472 [10:41:24<19:07:57, 3.44it/s] 36%|███▌ | 134535/371472 [10:41:25<19:57:29, 3.30it/s] 36%|███▌ | 134536/371472 [10:41:25<20:25:16, 3.22it/s] 36%|███▌ | 134537/371472 [10:41:25<18:58:56, 3.47it/s] 36%|███▌ | 134538/371472 [10:41:26<18:27:49, 3.56it/s] 36%|███▌ | 134539/371472 [10:41:26<18:56:04, 3.48it/s] 36%|███▌ | 134540/371472 [10:41:26<18:37:36, 3.53it/s] {'loss': 3.183, 'learning_rate': 6.743465707082678e-07, 'epoch': 5.79} + 36%|███▌ | 134540/371472 [10:41:26<18:37:36, 3.53it/s] 36%|███▌ | 134541/371472 [10:41:26<18:11:06, 3.62it/s] 36%|███▌ | 134542/371472 [10:41:27<17:40:06, 3.72it/s] 36%|███▌ | 134543/371472 [10:41:27<18:19:04, 3.59it/s] 36%|███▌ | 134544/371472 [10:41:27<19:13:34, 3.42it/s] 36%|███▌ | 134545/371472 [10:41:28<18:58:31, 3.47it/s] 36%|███▌ | 134546/371472 [10:41:28<18:19:24, 3.59it/s] 36%|███▌ | 134547/371472 [10:41:28<18:00:56, 3.65it/s] 36%|███▌ | 134548/371472 [10:41:28<17:47:22, 3.70it/s] 36%|███▌ | 134549/371472 [10:41:29<17:56:49, 3.67it/s] 36%|███▌ | 134550/371472 [10:41:29<17:32:26, 3.75it/s] 36%|███▌ | 134551/371472 [10:41:29<17:44:42, 3.71it/s] 36%|███▌ | 134552/371472 [10:41:29<17:51:24, 3.69it/s] 36%|███▌ | 134553/371472 [10:41:30<17:27:56, 3.77it/s] 36%|███▌ | 134554/371472 [10:41:30<16:59:45, 3.87it/s] 36%|███▌ | 134555/371472 [10:41:30<17:55:06, 3.67it/s] 36%|███▌ | 134556/371472 [10:41:31<18:21:11, 3.59it/s] 36%|███▌ | 134557/371472 [10:41:31<18:42:52, 3.52it/s] 36%|███▌ | 134558/371472 [10:41:31<18:03:54, 3.64it/s] 36%|███▌ | 134559/371472 [10:41:31<18:33:16, 3.55it/s] 36%|███▌ | 134560/371472 [10:41:32<18:36:27, 3.54it/s] {'loss': 3.2282, 'learning_rate': 6.742980887327888e-07, 'epoch': 5.8} + 36%|███▌ | 134560/371472 [10:41:32<18:36:27, 3.54it/s] 36%|███▌ | 134561/371472 [10:41:32<18:51:54, 3.49it/s] 36%|███▌ | 134562/371472 [10:41:32<17:39:46, 3.73it/s] 36%|███▌ | 134563/371472 [10:41:32<17:25:50, 3.78it/s] 36%|███▌ | 134564/371472 [10:41:33<19:41:27, 3.34it/s] 36%|███▌ | 134565/371472 [10:41:33<20:56:09, 3.14it/s] 36%|███▌ | 134566/371472 [10:41:33<19:15:01, 3.42it/s] 36%|███▌ | 134567/371472 [10:41:34<19:43:21, 3.34it/s] 36%|███▌ | 134568/371472 [10:41:34<18:43:26, 3.51it/s] 36%|███▌ | 134569/371472 [10:41:34<17:42:46, 3.72it/s] 36%|███▌ | 134570/371472 [10:41:35<18:07:06, 3.63it/s] 36%|███▌ | 134571/371472 [10:41:35<17:30:08, 3.76it/s] 36%|███▌ | 134572/371472 [10:41:35<17:27:43, 3.77it/s] 36%|███▌ | 134573/371472 [10:41:35<17:43:54, 3.71it/s] 36%|███▌ | 134574/371472 [10:41:36<18:50:28, 3.49it/s] 36%|███▌ | 134575/371472 [10:41:36<17:58:20, 3.66it/s] 36%|███▌ | 134576/371472 [10:41:36<17:43:45, 3.71it/s] 36%|███▌ | 134577/371472 [10:41:36<17:21:25, 3.79it/s] 36%|███▌ | 134578/371472 [10:41:37<18:12:12, 3.61it/s] 36%|███▌ | 134579/371472 [10:41:37<18:37:56, 3.53it/s] 36%|███▌ | 134580/371472 [10:41:37<18:57:27, 3.47it/s] {'loss': 3.1018, 'learning_rate': 6.7424960675731e-07, 'epoch': 5.8} + 36%|███▌ | 134580/371472 [10:41:37<18:57:27, 3.47it/s] 36%|███▌ | 134581/371472 [10:41:38<18:25:40, 3.57it/s] 36%|███▌ | 134582/371472 [10:41:38<18:36:12, 3.54it/s] 36%|███▌ | 134583/371472 [10:41:38<18:52:36, 3.49it/s] 36%|███▌ | 134584/371472 [10:41:38<19:27:22, 3.38it/s] 36%|███▌ | 134585/371472 [10:41:39<19:35:34, 3.36it/s] 36%|███▌ | 134586/371472 [10:41:39<19:00:45, 3.46it/s] 36%|███▌ | 134587/371472 [10:41:39<17:43:28, 3.71it/s] 36%|███▌ | 134588/371472 [10:41:40<18:25:21, 3.57it/s] 36%|███▌ | 134589/371472 [10:41:40<18:20:56, 3.59it/s] 36%|███▌ | 134590/371472 [10:41:40<18:08:21, 3.63it/s] 36%|███▌ | 134591/371472 [10:41:40<17:23:18, 3.78it/s] 36%|███▌ | 134592/371472 [10:41:41<17:10:22, 3.83it/s] 36%|███▌ | 134593/371472 [10:41:41<18:36:01, 3.54it/s] 36%|███▌ | 134594/371472 [10:41:41<19:00:09, 3.46it/s] 36%|███▌ | 134595/371472 [10:41:42<19:25:09, 3.39it/s] 36%|███▌ | 134596/371472 [10:41:42<19:03:18, 3.45it/s] 36%|███▌ | 134597/371472 [10:41:42<18:47:10, 3.50it/s] 36%|███▌ | 134598/371472 [10:41:42<19:25:09, 3.39it/s] 36%|███▌ | 134599/371472 [10:41:43<18:43:51, 3.51it/s] 36%|███▌ | 134600/371472 [10:41:43<19:54:53, 3.30it/s] {'loss': 2.9996, 'learning_rate': 6.742011247818312e-07, 'epoch': 5.8} + 36%|███▌ | 134600/371472 [10:41:43<19:54:53, 3.30it/s] 36%|███▌ | 134601/371472 [10:41:43<19:05:58, 3.44it/s] 36%|███▌ | 134602/371472 [10:41:44<18:39:16, 3.53it/s] 36%|███▌ | 134603/371472 [10:41:44<19:17:06, 3.41it/s] 36%|███▌ | 134604/371472 [10:41:44<18:42:47, 3.52it/s] 36%|███▌ | 134605/371472 [10:41:44<18:40:09, 3.52it/s] 36%|███▌ | 134606/371472 [10:41:45<18:30:54, 3.55it/s] 36%|███▌ | 134607/371472 [10:41:45<19:35:50, 3.36it/s] 36%|███▌ | 134608/371472 [10:41:45<19:15:43, 3.42it/s] 36%|███▌ | 134609/371472 [10:41:46<19:21:51, 3.40it/s] 36%|███▌ | 134610/371472 [10:41:46<19:39:11, 3.35it/s] 36%|███▌ | 134611/371472 [10:41:46<19:11:14, 3.43it/s] 36%|███▌ | 134612/371472 [10:41:46<17:53:36, 3.68it/s] 36%|███▌ | 134613/371472 [10:41:47<17:58:44, 3.66it/s] 36%|███▌ | 134614/371472 [10:41:47<17:26:37, 3.77it/s] 36%|███▌ | 134615/371472 [10:41:47<17:20:54, 3.79it/s] 36%|███▌ | 134616/371472 [10:41:47<17:28:54, 3.76it/s] 36%|███▌ | 134617/371472 [10:41:48<18:20:17, 3.59it/s] 36%|███▌ | 134618/371472 [10:41:48<18:28:30, 3.56it/s] 36%|███▌ | 134619/371472 [10:41:48<18:30:25, 3.56it/s] 36%|███▌ | 134620/371472 [10:41:49<18:18:56, 3.59it/s] {'loss': 3.0524, 'learning_rate': 6.741526428063521e-07, 'epoch': 5.8} + 36%|███▌ | 134620/371472 [10:41:49<18:18:56, 3.59it/s] 36%|███▌ | 134621/371472 [10:41:49<17:27:18, 3.77it/s] 36%|███▌ | 134622/371472 [10:41:49<17:09:21, 3.83it/s] 36%|███▌ | 134623/371472 [10:41:49<17:40:24, 3.72it/s] 36%|███▌ | 134624/371472 [10:41:50<17:22:00, 3.79it/s] 36%|███▌ | 134625/371472 [10:41:50<17:05:19, 3.85it/s] 36%|███▌ | 134626/371472 [10:41:50<18:08:39, 3.63it/s] 36%|███▌ | 134627/371472 [10:41:50<18:04:01, 3.64it/s] 36%|███▌ | 134628/371472 [10:41:51<18:11:27, 3.62it/s] 36%|███▌ | 134629/371472 [10:41:51<19:32:38, 3.37it/s] 36%|███▌ | 134630/371472 [10:41:51<19:22:16, 3.40it/s] 36%|███▌ | 134631/371472 [10:41:52<18:19:38, 3.59it/s] 36%|███▌ | 134632/371472 [10:41:52<19:32:44, 3.37it/s] 36%|███▌ | 134633/371472 [10:41:52<19:31:53, 3.37it/s] 36%|███▌ | 134634/371472 [10:41:53<18:55:15, 3.48it/s] 36%|███▌ | 134635/371472 [10:41:53<18:22:40, 3.58it/s] 36%|███▌ | 134636/371472 [10:41:53<17:58:21, 3.66it/s] 36%|███▌ | 134637/371472 [10:41:53<17:46:39, 3.70it/s] 36%|███▌ | 134638/371472 [10:41:54<17:26:01, 3.77it/s] 36%|███▌ | 134639/371472 [10:41:54<18:59:08, 3.47it/s] 36%|███▌ | 134640/371472 [10:41:54<18:41:19, 3.52it/s] {'loss': 3.2086, 'learning_rate': 6.741041608308733e-07, 'epoch': 5.8} + 36%|███▌ | 134640/371472 [10:41:54<18:41:19, 3.52it/s] 36%|███▌ | 134641/371472 [10:41:54<18:10:23, 3.62it/s] 36%|███▌ | 134642/371472 [10:41:55<17:23:12, 3.78it/s] 36%|███▌ | 134643/371472 [10:41:55<16:57:02, 3.88it/s] 36%|███▌ | 134644/371472 [10:41:55<17:32:18, 3.75it/s] 36%|███▌ | 134645/371472 [10:41:55<17:52:17, 3.68it/s] 36%|███▌ | 134646/371472 [10:41:56<17:20:05, 3.79it/s] 36%|███▌ | 134647/371472 [10:41:56<17:52:26, 3.68it/s] 36%|███▌ | 134648/371472 [10:41:56<17:49:41, 3.69it/s] 36%|███▌ | 134649/371472 [10:41:57<17:38:43, 3.73it/s] 36%|███▌ | 134650/371472 [10:41:57<17:37:20, 3.73it/s] 36%|███▌ | 134651/371472 [10:41:57<18:42:45, 3.52it/s] 36%|███▌ | 134652/371472 [10:41:57<18:13:35, 3.61it/s] 36%|███▌ | 134653/371472 [10:41:58<17:46:47, 3.70it/s] 36%|███▌ | 134654/371472 [10:41:58<18:19:17, 3.59it/s] 36%|███▌ | 134655/371472 [10:41:58<20:35:53, 3.19it/s] 36%|███▌ | 134656/371472 [10:41:59<19:22:11, 3.40it/s] 36%|███▌ | 134657/371472 [10:41:59<18:44:14, 3.51it/s] 36%|███▌ | 134658/371472 [10:41:59<18:07:19, 3.63it/s] 36%|███▋ | 134659/371472 [10:41:59<17:51:06, 3.68it/s] 36%|███▋ | 134660/371472 [10:42:00<18:05:29, 3.64it/s] {'loss': 3.3612, 'learning_rate': 6.740556788553944e-07, 'epoch': 5.8} + 36%|███▋ | 134660/371472 [10:42:00<18:05:29, 3.64it/s] 36%|███▋ | 134661/371472 [10:42:00<17:59:27, 3.66it/s] 36%|███▋ | 134662/371472 [10:42:00<17:21:52, 3.79it/s] 36%|███▋ | 134663/371472 [10:42:00<17:25:32, 3.77it/s] 36%|███▋ | 134664/371472 [10:42:01<18:53:26, 3.48it/s] 36%|███▋ | 134665/371472 [10:42:01<18:52:26, 3.49it/s] 36%|███▋ | 134666/371472 [10:42:01<18:45:04, 3.51it/s] 36%|███▋ | 134667/371472 [10:42:02<19:24:09, 3.39it/s] 36%|███▋ | 134668/371472 [10:42:02<19:05:31, 3.45it/s] 36%|███▋ | 134669/371472 [10:42:02<18:50:21, 3.49it/s] 36%|███▋ | 134670/371472 [10:42:03<19:44:26, 3.33it/s] 36%|███▋ | 134671/371472 [10:42:03<19:10:24, 3.43it/s] 36%|███▋ | 134672/371472 [10:42:03<18:21:49, 3.58it/s] 36%|███▋ | 134673/371472 [10:42:03<17:54:57, 3.67it/s] 36%|███▋ | 134674/371472 [10:42:04<17:51:25, 3.68it/s] 36%|███▋ | 134675/371472 [10:42:04<18:28:46, 3.56it/s] 36%|███▋ | 134676/371472 [10:42:04<20:35:12, 3.20it/s] 36%|███▋ | 134677/371472 [10:42:05<19:25:44, 3.39it/s] 36%|███▋ | 134678/371472 [10:42:05<19:22:50, 3.39it/s] 36%|███▋ | 134679/371472 [10:42:05<18:34:38, 3.54it/s] 36%|███▋ | 134680/371472 [10:42:05<21:06:19, 3.12it/s] {'loss': 3.1381, 'learning_rate': 6.740071968799155e-07, 'epoch': 5.8} + 36%|███▋ | 134680/371472 [10:42:05<21:06:19, 3.12it/s] 36%|███▋ | 134681/371472 [10:42:06<20:47:58, 3.16it/s] 36%|███▋ | 134682/371472 [10:42:06<19:43:50, 3.33it/s] 36%|███▋ | 134683/371472 [10:42:06<19:32:38, 3.37it/s] 36%|███▋ | 134684/371472 [10:42:07<20:23:10, 3.23it/s] 36%|███▋ | 134685/371472 [10:42:07<20:55:46, 3.14it/s] 36%|███▋ | 134686/371472 [10:42:07<21:22:24, 3.08it/s] 36%|███▋ | 134687/371472 [10:42:08<20:02:03, 3.28it/s] 36%|███▋ | 134688/371472 [10:42:08<19:22:49, 3.39it/s] 36%|███▋ | 134689/371472 [10:42:08<18:57:40, 3.47it/s] 36%|███▋ | 134690/371472 [10:42:08<18:38:45, 3.53it/s] 36%|███▋ | 134691/371472 [10:42:09<18:38:26, 3.53it/s] 36%|███▋ | 134692/371472 [10:42:09<18:21:46, 3.58it/s] 36%|███▋ | 134693/371472 [10:42:09<18:27:51, 3.56it/s] 36%|███▋ | 134694/371472 [10:42:10<18:16:46, 3.60it/s] 36%|███▋ | 134695/371472 [10:42:10<17:53:53, 3.67it/s] 36%|███▋ | 134696/371472 [10:42:10<17:36:57, 3.73it/s] 36%|███▋ | 134697/371472 [10:42:10<17:08:00, 3.84it/s] 36%|███▋ | 134698/371472 [10:42:11<16:52:46, 3.90it/s] 36%|███▋ | 134699/371472 [10:42:11<16:58:33, 3.87it/s] 36%|███▋ | 134700/371472 [10:42:11<17:57:22, 3.66it/s] {'loss': 3.2056, 'learning_rate': 6.739587149044366e-07, 'epoch': 5.8} + 36%|███▋ | 134700/371472 [10:42:11<17:57:22, 3.66it/s] 36%|███▋ | 134701/371472 [10:42:11<17:25:50, 3.77it/s] 36%|███▋ | 134702/371472 [10:42:12<19:19:34, 3.40it/s] 36%|███▋ | 134703/371472 [10:42:12<21:34:15, 3.05it/s] 36%|███▋ | 134704/371472 [10:42:12<20:04:32, 3.28it/s] 36%|███▋ | 134705/371472 [10:42:13<19:19:59, 3.40it/s] 36%|███▋ | 134706/371472 [10:42:13<19:24:05, 3.39it/s] 36%|███▋ | 134707/371472 [10:42:13<19:09:58, 3.43it/s] 36%|███▋ | 134708/371472 [10:42:14<19:01:27, 3.46it/s] 36%|███▋ | 134709/371472 [10:42:14<18:21:09, 3.58it/s] 36%|███▋ | 134710/371472 [10:42:14<18:38:46, 3.53it/s] 36%|███▋ | 134711/371472 [10:42:14<19:15:29, 3.42it/s] 36%|███▋ | 134712/371472 [10:42:15<20:10:57, 3.26it/s] 36%|███▋ | 134713/371472 [10:42:15<19:48:19, 3.32it/s] 36%|███▋ | 134714/371472 [10:42:15<19:52:04, 3.31it/s] 36%|███▋ | 134715/371472 [10:42:16<20:34:33, 3.20it/s] 36%|███▋ | 134716/371472 [10:42:16<20:47:24, 3.16it/s] 36%|███▋ | 134717/371472 [10:42:16<20:51:42, 3.15it/s] 36%|███▋ | 134718/371472 [10:42:17<20:51:49, 3.15it/s] 36%|███▋ | 134719/371472 [10:42:17<19:46:23, 3.33it/s] 36%|███▋ | 134720/371472 [10:42:17<19:32:26, 3.37it/s] {'loss': 3.1089, 'learning_rate': 6.739102329289577e-07, 'epoch': 5.8} + 36%|███▋ | 134720/371472 [10:42:17<19:32:26, 3.37it/s] 36%|███▋ | 134721/371472 [10:42:17<19:39:36, 3.35it/s] 36%|███▋ | 134722/371472 [10:42:18<19:47:18, 3.32it/s] 36%|███▋ | 134723/371472 [10:42:18<19:13:04, 3.42it/s] 36%|███▋ | 134724/371472 [10:42:18<19:18:59, 3.40it/s] 36%|███▋ | 134725/371472 [10:42:19<19:46:49, 3.32it/s] 36%|███▋ | 134726/371472 [10:42:19<19:38:13, 3.35it/s] 36%|███▋ | 134727/371472 [10:42:19<18:43:16, 3.51it/s] 36%|███▋ | 134728/371472 [10:42:19<18:04:34, 3.64it/s] 36%|███▋ | 134729/371472 [10:42:20<18:33:51, 3.54it/s] 36%|███▋ | 134730/371472 [10:42:20<18:15:02, 3.60it/s] 36%|███▋ | 134731/371472 [10:42:20<17:27:10, 3.77it/s] 36%|███▋ | 134732/371472 [10:42:21<17:52:48, 3.68it/s] 36%|███▋ | 134733/371472 [10:42:21<18:34:14, 3.54it/s] 36%|███▋ | 134734/371472 [10:42:21<19:03:07, 3.45it/s] 36%|███▋ | 134735/371472 [10:42:21<18:00:39, 3.65it/s] 36%|███▋ | 134736/371472 [10:42:22<18:07:08, 3.63it/s] 36%|███▋ | 134737/371472 [10:42:22<17:49:46, 3.69it/s] 36%|███▋ | 134738/371472 [10:42:22<17:19:42, 3.79it/s] 36%|███▋ | 134739/371472 [10:42:22<16:56:13, 3.88it/s] 36%|███▋ | 134740/371472 [10:42:23<17:09:09, 3.83it/s] {'loss': 3.3494, 'learning_rate': 6.738617509534788e-07, 'epoch': 5.8} + 36%|███▋ | 134740/371472 [10:42:23<17:09:09, 3.83it/s] 36%|███▋ | 134741/371472 [10:42:23<16:54:45, 3.89it/s] 36%|███▋ | 134742/371472 [10:42:23<18:12:10, 3.61it/s] 36%|███▋ | 134743/371472 [10:42:24<18:04:23, 3.64it/s] 36%|███▋ | 134744/371472 [10:42:24<18:42:20, 3.52it/s] 36%|███▋ | 134745/371472 [10:42:24<19:05:12, 3.45it/s] 36%|███▋ | 134746/371472 [10:42:24<18:45:12, 3.51it/s] 36%|███▋ | 134747/371472 [10:42:25<17:57:00, 3.66it/s] 36%|███▋ | 134748/371472 [10:42:25<17:50:58, 3.68it/s] 36%|███▋ | 134749/371472 [10:42:25<18:49:11, 3.49it/s] 36%|███▋ | 134750/371472 [10:42:26<18:08:57, 3.62it/s] 36%|███▋ | 134751/371472 [10:42:26<17:36:47, 3.73it/s] 36%|███▋ | 134752/371472 [10:42:26<17:43:08, 3.71it/s] 36%|███▋ | 134753/371472 [10:42:26<18:00:10, 3.65it/s] 36%|███▋ | 134754/371472 [10:42:27<19:33:08, 3.36it/s] 36%|███▋ | 134755/371472 [10:42:27<19:17:20, 3.41it/s] 36%|███▋ | 134756/371472 [10:42:27<18:30:46, 3.55it/s] 36%|███▋ | 134757/371472 [10:42:28<19:33:22, 3.36it/s] 36%|███▋ | 134758/371472 [10:42:28<19:19:35, 3.40it/s] 36%|███▋ | 134759/371472 [10:42:28<18:46:12, 3.50it/s] 36%|███▋ | 134760/371472 [10:42:28<19:31:31, 3.37it/s] {'loss': 3.1653, 'learning_rate': 6.738132689779999e-07, 'epoch': 5.8} + 36%|███▋ | 134760/371472 [10:42:28<19:31:31, 3.37it/s] 36%|███▋ | 134761/371472 [10:42:29<19:11:29, 3.43it/s] 36%|███▋ | 134762/371472 [10:42:29<19:48:33, 3.32it/s] 36%|███▋ | 134763/371472 [10:42:29<19:10:02, 3.43it/s] 36%|███▋ | 134764/371472 [10:42:30<20:02:52, 3.28it/s] 36%|███▋ | 134765/371472 [10:42:30<19:04:44, 3.45it/s] 36%|███▋ | 134766/371472 [10:42:30<18:41:16, 3.52it/s] 36%|███▋ | 134767/371472 [10:42:30<18:38:06, 3.53it/s] 36%|███▋ | 134768/371472 [10:42:31<19:26:40, 3.38it/s] 36%|███▋ | 134769/371472 [10:42:31<19:07:08, 3.44it/s] 36%|███▋ | 134770/371472 [10:42:31<17:56:38, 3.66it/s] 36%|███▋ | 134771/371472 [10:42:32<18:02:52, 3.64it/s] 36%|███▋ | 134772/371472 [10:42:32<17:39:39, 3.72it/s] 36%|███▋ | 134773/371472 [10:42:32<17:41:20, 3.72it/s] 36%|███▋ | 134774/371472 [10:42:32<18:31:09, 3.55it/s] 36%|███▋ | 134775/371472 [10:42:33<17:38:44, 3.73it/s] 36%|███▋ | 134776/371472 [10:42:33<17:52:52, 3.68it/s] 36%|███▋ | 134777/371472 [10:42:33<19:42:27, 3.34it/s] 36%|███▋ | 134778/371472 [10:42:34<19:14:02, 3.42it/s] 36%|███▋ | 134779/371472 [10:42:34<18:57:54, 3.47it/s] 36%|███▋ | 134780/371472 [10:42:34<18:39:53, 3.52it/s] {'loss': 3.2408, 'learning_rate': 6.73764787002521e-07, 'epoch': 5.81} + 36%|███▋ | 134780/371472 [10:42:34<18:39:53, 3.52it/s] 36%|███▋ | 134781/371472 [10:42:34<19:41:35, 3.34it/s] 36%|███▋ | 134782/371472 [10:42:35<19:03:11, 3.45it/s] 36%|███▋ | 134783/371472 [10:42:35<19:15:25, 3.41it/s] 36%|███▋ | 134784/371472 [10:42:35<18:52:54, 3.48it/s] 36%|███▋ | 134785/371472 [10:42:36<18:16:47, 3.60it/s] 36%|███▋ | 134786/371472 [10:42:36<19:51:09, 3.31it/s] 36%|███▋ | 134787/371472 [10:42:36<19:12:55, 3.42it/s] 36%|███▋ | 134788/371472 [10:42:36<19:30:19, 3.37it/s] 36%|███▋ | 134789/371472 [10:42:37<22:02:11, 2.98it/s] 36%|███▋ | 134790/371472 [10:42:37<20:42:34, 3.17it/s] 36%|███▋ | 134791/371472 [10:42:37<20:03:21, 3.28it/s] 36%|███▋ | 134792/371472 [10:42:38<19:06:30, 3.44it/s] 36%|███▋ | 134793/371472 [10:42:38<19:10:33, 3.43it/s] 36%|███▋ | 134794/371472 [10:42:38<19:38:02, 3.35it/s] 36%|███▋ | 134795/371472 [10:42:39<18:49:34, 3.49it/s] 36%|███▋ | 134796/371472 [10:42:39<18:02:08, 3.65it/s] 36%|███▋ | 134797/371472 [10:42:39<17:27:27, 3.77it/s] 36%|███▋ | 134798/371472 [10:42:39<17:49:40, 3.69it/s] 36%|███▋ | 134799/371472 [10:42:40<17:48:32, 3.69it/s] 36%|███▋ | 134800/371472 [10:42:40<18:45:03, 3.51it/s] {'loss': 3.1996, 'learning_rate': 6.737163050270421e-07, 'epoch': 5.81} + 36%|███▋ | 134800/371472 [10:42:40<18:45:03, 3.51it/s] 36%|███▋ | 134801/371472 [10:42:40<18:47:00, 3.50it/s] 36%|███▋ | 134802/371472 [10:42:40<18:14:44, 3.60it/s] 36%|███▋ | 134803/371472 [10:42:41<18:54:16, 3.48it/s] 36%|███▋ | 134804/371472 [10:42:41<18:22:54, 3.58it/s] 36%|███▋ | 134805/371472 [10:42:41<19:12:33, 3.42it/s] 36%|███▋ | 134806/371472 [10:42:42<20:07:45, 3.27it/s] 36%|███▋ | 134807/371472 [10:42:42<19:13:51, 3.42it/s] 36%|███▋ | 134808/371472 [10:42:42<19:03:22, 3.45it/s] 36%|███▋ | 134809/371472 [10:42:43<20:01:08, 3.28it/s] 36%|███▋ | 134810/371472 [10:42:43<24:43:05, 2.66it/s] 36%|███▋ | 134811/371472 [10:42:43<24:07:59, 2.72it/s] 36%|███▋ | 134812/371472 [10:42:44<22:13:36, 2.96it/s] 36%|███▋ | 134813/371472 [10:42:44<22:02:06, 2.98it/s] 36%|███▋ | 134814/371472 [10:42:44<22:12:27, 2.96it/s] 36%|███▋ | 134815/371472 [10:42:45<22:12:40, 2.96it/s] 36%|███▋ | 134816/371472 [10:42:45<20:41:52, 3.18it/s] 36%|███▋ | 134817/371472 [10:42:45<19:30:12, 3.37it/s] 36%|███▋ | 134818/371472 [10:42:46<19:48:27, 3.32it/s] 36%|███▋ | 134819/371472 [10:42:46<19:17:54, 3.41it/s] 36%|███▋ | 134820/371472 [10:42:46<18:50:15, 3.49it/s] {'loss': 3.0941, 'learning_rate': 6.736678230515633e-07, 'epoch': 5.81} + 36%|███▋ | 134820/371472 [10:42:46<18:50:15, 3.49it/s] 36%|███▋ | 134821/371472 [10:42:46<18:28:16, 3.56it/s] 36%|███▋ | 134822/371472 [10:42:47<19:29:31, 3.37it/s] 36%|███▋ | 134823/371472 [10:42:47<19:15:40, 3.41it/s] 36%|███▋ | 134824/371472 [10:42:47<19:13:50, 3.42it/s] 36%|███▋ | 134825/371472 [10:42:48<18:34:40, 3.54it/s] 36%|███▋ | 134826/371472 [10:42:48<18:03:45, 3.64it/s] 36%|███▋ | 134827/371472 [10:42:48<18:42:44, 3.51it/s] 36%|███▋ | 134828/371472 [10:42:48<18:24:22, 3.57it/s] 36%|███▋ | 134829/371472 [10:42:49<18:25:19, 3.57it/s] 36%|███▋ | 134830/371472 [10:42:49<18:35:45, 3.53it/s] 36%|███▋ | 134831/371472 [10:42:49<17:34:36, 3.74it/s] 36%|███▋ | 134832/371472 [10:42:49<17:19:14, 3.80it/s] 36%|███▋ | 134833/371472 [10:42:50<18:12:22, 3.61it/s] 36%|███▋ | 134834/371472 [10:42:50<18:29:46, 3.55it/s] 36%|███▋ | 134835/371472 [10:42:50<19:23:20, 3.39it/s] 36%|███▋ | 134836/371472 [10:42:51<19:41:10, 3.34it/s] 36%|███▋ | 134837/371472 [10:42:51<19:05:32, 3.44it/s] 36%|███▋ | 134838/371472 [10:42:51<18:54:39, 3.48it/s] 36%|███▋ | 134839/371472 [10:42:52<19:01:59, 3.45it/s] 36%|███▋ | 134840/371472 [10:42:52<19:04:03, 3.45it/s] {'loss': 3.1541, 'learning_rate': 6.736193410760844e-07, 'epoch': 5.81} + 36%|███▋ | 134840/371472 [10:42:52<19:04:03, 3.45it/s] 36%|███▋ | 134841/371472 [10:42:52<20:10:09, 3.26it/s] 36%|███▋ | 134842/371472 [10:42:52<19:54:55, 3.30it/s] 36%|███▋ | 134843/371472 [10:42:53<19:14:52, 3.41it/s] 36%|███▋ | 134844/371472 [10:42:53<19:06:10, 3.44it/s] 36%|███▋ | 134845/371472 [10:42:53<19:22:08, 3.39it/s] 36%|███▋ | 134846/371472 [10:42:54<18:09:31, 3.62it/s] 36%|███▋ | 134847/371472 [10:42:54<18:45:16, 3.50it/s] 36%|███▋ | 134848/371472 [10:42:54<18:05:33, 3.63it/s] 36%|███▋ | 134849/371472 [10:42:55<20:18:03, 3.24it/s] 36%|███▋ | 134850/371472 [10:42:55<19:14:25, 3.42it/s] 36%|███▋ | 134851/371472 [10:42:55<19:05:00, 3.44it/s] 36%|███▋ | 134852/371472 [10:42:55<19:07:13, 3.44it/s] 36%|███▋ | 134853/371472 [10:42:56<19:18:09, 3.41it/s] 36%|███▋ | 134854/371472 [10:42:56<19:52:24, 3.31it/s] 36%|███▋ | 134855/371472 [10:42:56<19:54:26, 3.30it/s] 36%|███▋ | 134856/371472 [10:42:57<19:57:14, 3.29it/s] 36%|███▋ | 134857/371472 [10:42:57<19:20:32, 3.40it/s] 36%|███▋ | 134858/371472 [10:42:57<19:36:00, 3.35it/s] 36%|███▋ | 134859/371472 [10:42:57<19:12:01, 3.42it/s] 36%|███▋ | 134860/371472 [10:42:58<18:48:43, 3.49it/s] {'loss': 3.1663, 'learning_rate': 6.735708591006054e-07, 'epoch': 5.81} + 36%|███▋ | 134860/371472 [10:42:58<18:48:43, 3.49it/s] 36%|███▋ | 134861/371472 [10:42:58<18:59:23, 3.46it/s] 36%|███▋ | 134862/371472 [10:42:58<18:33:45, 3.54it/s] 36%|███▋ | 134863/371472 [10:42:59<18:15:28, 3.60it/s] 36%|███▋ | 134864/371472 [10:42:59<18:50:25, 3.49it/s] 36%|███▋ | 134865/371472 [10:42:59<18:15:17, 3.60it/s] 36%|███▋ | 134866/371472 [10:42:59<18:07:50, 3.63it/s] 36%|███▋ | 134867/371472 [10:43:00<17:59:09, 3.65it/s] 36%|███▋ | 134868/371472 [10:43:00<18:28:08, 3.56it/s] 36%|███▋ | 134869/371472 [10:43:00<17:42:47, 3.71it/s] 36%|███▋ | 134870/371472 [10:43:00<17:53:31, 3.67it/s] 36%|███▋ | 134871/371472 [10:43:01<17:15:22, 3.81it/s] 36%|███▋ | 134872/371472 [10:43:01<17:11:25, 3.82it/s] 36%|███▋ | 134873/371472 [10:43:01<16:43:31, 3.93it/s] 36%|███▋ | 134874/371472 [10:43:01<17:26:04, 3.77it/s] 36%|███▋ | 134875/371472 [10:43:02<17:05:39, 3.84it/s] 36%|███▋ | 134876/371472 [10:43:02<17:30:36, 3.75it/s] 36%|███▋ | 134877/371472 [10:43:02<17:41:14, 3.72it/s] 36%|███▋ | 134878/371472 [10:43:03<17:30:11, 3.75it/s] 36%|███▋ | 134879/371472 [10:43:03<18:10:00, 3.62it/s] 36%|███▋ | 134880/371472 [10:43:03<18:57:08, 3.47it/s] {'loss': 3.2249, 'learning_rate': 6.735223771251265e-07, 'epoch': 5.81} + 36%|███▋ | 134880/371472 [10:43:03<18:57:08, 3.47it/s] 36%|███▋ | 134881/371472 [10:43:03<18:50:32, 3.49it/s] 36%|███▋ | 134882/371472 [10:43:04<18:40:33, 3.52it/s] 36%|███▋ | 134883/371472 [10:43:04<17:44:59, 3.70it/s] 36%|███▋ | 134884/371472 [10:43:04<17:22:19, 3.78it/s] 36%|███▋ | 134885/371472 [10:43:04<17:20:05, 3.79it/s] 36%|███▋ | 134886/371472 [10:43:05<16:40:08, 3.94it/s] 36%|███▋ | 134887/371472 [10:43:05<16:17:41, 4.03it/s] 36%|███▋ | 134888/371472 [10:43:05<16:21:29, 4.02it/s] 36%|███▋ | 134889/371472 [10:43:06<17:35:10, 3.74it/s] 36%|███▋ | 134890/371472 [10:43:06<18:44:04, 3.51it/s] 36%|███▋ | 134891/371472 [10:43:06<18:27:35, 3.56it/s] 36%|███▋ | 134892/371472 [10:43:06<17:51:12, 3.68it/s] 36%|███▋ | 134893/371472 [10:43:07<17:38:15, 3.73it/s] 36%|███▋ | 134894/371472 [10:43:07<17:36:37, 3.73it/s] 36%|███▋ | 134895/371472 [10:43:07<17:16:46, 3.80it/s] 36%|███▋ | 134896/371472 [10:43:07<18:53:44, 3.48it/s] 36%|███▋ | 134897/371472 [10:43:08<18:17:39, 3.59it/s] 36%|███▋ | 134898/371472 [10:43:08<17:46:48, 3.70it/s] 36%|███▋ | 134899/371472 [10:43:08<17:53:44, 3.67it/s] 36%|███▋ | 134900/371472 [10:43:09<19:27:47, 3.38it/s] {'loss': 3.0091, 'learning_rate': 6.734738951496477e-07, 'epoch': 5.81} + 36%|███▋ | 134900/371472 [10:43:09<19:27:47, 3.38it/s] 36%|███▋ | 134901/371472 [10:43:09<18:45:47, 3.50it/s] 36%|███▋ | 134902/371472 [10:43:09<18:26:57, 3.56it/s] 36%|███▋ | 134903/371472 [10:43:10<22:13:02, 2.96it/s] 36%|███▋ | 134904/371472 [10:43:10<20:53:54, 3.14it/s] 36%|███▋ | 134905/371472 [10:43:10<21:00:36, 3.13it/s] 36%|███▋ | 134906/371472 [10:43:10<19:28:19, 3.37it/s] 36%|███▋ | 134907/371472 [10:43:11<19:09:52, 3.43it/s] 36%|███▋ | 134908/371472 [10:43:11<18:04:53, 3.63it/s] 36%|███▋ | 134909/371472 [10:43:11<18:36:37, 3.53it/s] 36%|███▋ | 134910/371472 [10:43:12<18:33:12, 3.54it/s] 36%|███▋ | 134911/371472 [10:43:12<18:28:03, 3.56it/s] 36%|███▋ | 134912/371472 [10:43:12<18:08:35, 3.62it/s] 36%|███▋ | 134913/371472 [10:43:12<17:42:07, 3.71it/s] 36%|███▋ | 134914/371472 [10:43:13<17:32:30, 3.75it/s] 36%|███▋ | 134915/371472 [10:43:13<17:08:07, 3.83it/s] 36%|███▋ | 134916/371472 [10:43:13<17:27:14, 3.76it/s] 36%|███▋ | 134917/371472 [10:43:13<18:31:46, 3.55it/s] 36%|███▋ | 134918/371472 [10:43:14<18:08:16, 3.62it/s] 36%|███▋ | 134919/371472 [10:43:14<17:45:28, 3.70it/s] 36%|███▋ | 134920/371472 [10:43:14<17:49:39, 3.69it/s] {'loss': 3.1883, 'learning_rate': 6.734254131741688e-07, 'epoch': 5.81} + 36%|███▋ | 134920/371472 [10:43:14<17:49:39, 3.69it/s] 36%|███▋ | 134921/371472 [10:43:15<17:57:56, 3.66it/s] 36%|███▋ | 134922/371472 [10:43:15<18:09:43, 3.62it/s] 36%|███▋ | 134923/371472 [10:43:15<17:21:25, 3.79it/s] 36%|███▋ | 134924/371472 [10:43:15<17:36:23, 3.73it/s] 36%|███▋ | 134925/371472 [10:43:16<17:00:52, 3.86it/s] 36%|███▋ | 134926/371472 [10:43:16<17:19:19, 3.79it/s] 36%|███▋ | 134927/371472 [10:43:16<16:58:40, 3.87it/s] 36%|███▋ | 134928/371472 [10:43:16<16:59:14, 3.87it/s] 36%|███▋ | 134929/371472 [10:43:17<17:10:37, 3.83it/s] 36%|███▋ | 134930/371472 [10:43:17<16:54:30, 3.89it/s] 36%|███▋ | 134931/371472 [10:43:17<17:04:48, 3.85it/s] 36%|███▋ | 134932/371472 [10:43:17<16:58:58, 3.87it/s] 36%|███▋ | 134933/371472 [10:43:18<18:22:22, 3.58it/s] 36%|███▋ | 134934/371472 [10:43:18<19:21:02, 3.40it/s] 36%|███▋ | 134935/371472 [10:43:18<19:45:42, 3.32it/s] 36%|███▋ | 134936/371472 [10:43:19<20:14:48, 3.25it/s] 36%|███▋ | 134937/371472 [10:43:19<21:48:44, 3.01it/s] 36%|███▋ | 134938/371472 [10:43:19<20:43:34, 3.17it/s] 36%|███▋ | 134939/371472 [10:43:20<19:38:43, 3.34it/s] 36%|███▋ | 134940/371472 [10:43:20<18:44:53, 3.50it/s] {'loss': 3.0712, 'learning_rate': 6.733769311986898e-07, 'epoch': 5.81} + 36%|███▋ | 134940/371472 [10:43:20<18:44:53, 3.50it/s] 36%|███▋ | 134941/371472 [10:43:20<18:31:36, 3.55it/s] 36%|███▋ | 134942/371472 [10:43:20<18:22:59, 3.57it/s] 36%|███▋ | 134943/371472 [10:43:21<17:20:44, 3.79it/s] 36%|███▋ | 134944/371472 [10:43:21<17:31:27, 3.75it/s] 36%|███▋ | 134945/371472 [10:43:21<19:04:25, 3.44it/s] 36%|███▋ | 134946/371472 [10:43:22<19:26:12, 3.38it/s] 36%|███▋ | 134947/371472 [10:43:22<19:14:49, 3.41it/s] 36%|███▋ | 134948/371472 [10:43:22<18:35:59, 3.53it/s] 36%|███▋ | 134949/371472 [10:43:22<18:37:15, 3.53it/s] 36%|███▋ | 134950/371472 [10:43:23<17:51:28, 3.68it/s] 36%|███▋ | 134951/371472 [10:43:23<18:05:46, 3.63it/s] 36%|███▋ | 134952/371472 [10:43:23<19:15:43, 3.41it/s] 36%|███▋ | 134953/371472 [10:43:24<18:43:43, 3.51it/s] 36%|███▋ | 134954/371472 [10:43:24<18:21:51, 3.58it/s] 36%|███▋ | 134955/371472 [10:43:24<19:58:42, 3.29it/s] 36%|███▋ | 134956/371472 [10:43:24<20:45:00, 3.17it/s] 36%|███▋ | 134957/371472 [10:43:25<20:08:40, 3.26it/s] 36%|███▋ | 134958/371472 [10:43:25<21:13:19, 3.10it/s] 36%|███▋ | 134959/371472 [10:43:25<19:40:26, 3.34it/s] 36%|███▋ | 134960/371472 [10:43:26<21:08:27, 3.11it/s] {'loss': 3.0874, 'learning_rate': 6.73328449223211e-07, 'epoch': 5.81} + 36%|███▋ | 134960/371472 [10:43:26<21:08:27, 3.11it/s] 36%|███▋ | 134961/371472 [10:43:26<20:11:27, 3.25it/s] 36%|███▋ | 134962/371472 [10:43:26<19:13:23, 3.42it/s] 36%|███▋ | 134963/371472 [10:43:27<19:09:34, 3.43it/s] 36%|███▋ | 134964/371472 [10:43:27<19:59:07, 3.29it/s] 36%|███▋ | 134965/371472 [10:43:27<20:43:46, 3.17it/s] 36%|███▋ | 134966/371472 [10:43:28<19:55:29, 3.30it/s] 36%|███▋ | 134967/371472 [10:43:28<18:57:07, 3.47it/s] 36%|███▋ | 134968/371472 [10:43:28<18:52:50, 3.48it/s] 36%|███▋ | 134969/371472 [10:43:28<18:59:25, 3.46it/s] 36%|███▋ | 134970/371472 [10:43:29<17:56:35, 3.66it/s] 36%|███▋ | 134971/371472 [10:43:29<19:52:12, 3.31it/s] 36%|███▋ | 134972/371472 [10:43:29<19:40:20, 3.34it/s] 36%|███▋ | 134973/371472 [10:43:30<19:12:53, 3.42it/s] 36%|███▋ | 134974/371472 [10:43:30<19:26:56, 3.38it/s] 36%|███▋ | 134975/371472 [10:43:30<19:27:14, 3.38it/s] 36%|███▋ | 134976/371472 [10:43:30<18:49:22, 3.49it/s] 36%|███▋ | 134977/371472 [10:43:31<18:13:48, 3.60it/s] 36%|███▋ | 134978/371472 [10:43:31<17:43:39, 3.71it/s] 36%|███▋ | 134979/371472 [10:43:31<17:39:35, 3.72it/s] 36%|███▋ | 134980/371472 [10:43:31<17:07:52, 3.83it/s] {'loss': 3.1555, 'learning_rate': 6.732799672477322e-07, 'epoch': 5.81} + 36%|███▋ | 134980/371472 [10:43:31<17:07:52, 3.83it/s] 36%|███▋ | 134981/371472 [10:43:32<17:27:11, 3.76it/s] 36%|███▋ | 134982/371472 [10:43:32<18:45:50, 3.50it/s] 36%|███▋ | 134983/371472 [10:43:32<19:51:43, 3.31it/s] 36%|███▋ | 134984/371472 [10:43:33<19:31:00, 3.37it/s] 36%|███▋ | 134985/371472 [10:43:33<19:10:05, 3.43it/s] 36%|███▋ | 134986/371472 [10:43:33<18:22:51, 3.57it/s] 36%|███▋ | 134987/371472 [10:43:33<18:53:02, 3.48it/s] 36%|███▋ | 134988/371472 [10:43:34<18:39:03, 3.52it/s] 36%|███▋ | 134989/371472 [10:43:34<18:45:26, 3.50it/s] 36%|███▋ | 134990/371472 [10:43:34<18:07:28, 3.62it/s] 36%|███▋ | 134991/371472 [10:43:35<18:00:23, 3.65it/s] 36%|███▋ | 134992/371472 [10:43:35<18:12:07, 3.61it/s] 36%|███▋ | 134993/371472 [10:43:35<18:41:00, 3.52it/s] 36%|███▋ | 134994/371472 [10:43:35<17:49:10, 3.69it/s] 36%|███▋ | 134995/371472 [10:43:36<17:42:25, 3.71it/s] 36%|███▋ | 134996/371472 [10:43:36<17:20:34, 3.79it/s] 36%|███▋ | 134997/371472 [10:43:36<16:34:33, 3.96it/s] 36%|███▋ | 134998/371472 [10:43:36<16:17:00, 4.03it/s] 36%|███▋ | 134999/371472 [10:43:37<18:00:22, 3.65it/s] 36%|███▋ | 135000/371472 [10:43:37<17:53:05, 3.67it/s] {'loss': 3.2034, 'learning_rate': 6.732314852722532e-07, 'epoch': 5.81} + 36%|███▋ | 135000/371472 [10:43:37<17:53:05, 3.67it/s] 36%|███▋ | 135001/371472 [10:43:37<17:38:47, 3.72it/s] 36%|███▋ | 135002/371472 [10:43:38<18:31:46, 3.54it/s] 36%|███▋ | 135003/371472 [10:43:38<18:02:05, 3.64it/s] 36%|███▋ | 135004/371472 [10:43:38<20:36:23, 3.19it/s] 36%|███▋ | 135005/371472 [10:43:38<19:34:46, 3.35it/s] 36%|███▋ | 135006/371472 [10:43:39<18:57:04, 3.47it/s] 36%|███▋ | 135007/371472 [10:43:39<18:29:32, 3.55it/s] 36%|███▋ | 135008/371472 [10:43:39<19:28:57, 3.37it/s] 36%|███▋ | 135009/371472 [10:43:40<18:40:35, 3.52it/s] 36%|███▋ | 135010/371472 [10:43:40<18:54:35, 3.47it/s] 36%|███▋ | 135011/371472 [10:43:40<17:57:04, 3.66it/s] 36%|███▋ | 135012/371472 [10:43:40<17:57:07, 3.66it/s] 36%|███▋ | 135013/371472 [10:43:41<18:20:51, 3.58it/s] 36%|███▋ | 135014/371472 [10:43:41<18:55:24, 3.47it/s] 36%|███▋ | 135015/371472 [10:43:41<18:08:47, 3.62it/s] 36%|███▋ | 135016/371472 [10:43:42<18:12:03, 3.61it/s] 36%|███▋ | 135017/371472 [10:43:42<17:25:14, 3.77it/s] 36%|███▋ | 135018/371472 [10:43:42<18:02:57, 3.64it/s] 36%|███▋ | 135019/371472 [10:43:42<17:50:07, 3.68it/s] 36%|███▋ | 135020/371472 [10:43:43<18:58:37, 3.46it/s] {'loss': 3.3072, 'learning_rate': 6.731830032967742e-07, 'epoch': 5.82} + 36%|███▋ | 135020/371472 [10:43:43<18:58:37, 3.46it/s] 36%|███▋ | 135021/371472 [10:43:43<18:46:16, 3.50it/s] 36%|███▋ | 135022/371472 [10:43:43<19:08:40, 3.43it/s] 36%|███▋ | 135023/371472 [10:43:43<18:16:08, 3.60it/s] 36%|██���▋ | 135024/371472 [10:43:44<18:01:26, 3.64it/s] 36%|███▋ | 135025/371472 [10:43:44<17:45:02, 3.70it/s] 36%|███▋ | 135026/371472 [10:43:44<18:25:31, 3.56it/s] 36%|███▋ | 135027/371472 [10:43:45<19:15:54, 3.41it/s] 36%|███▋ | 135028/371472 [10:43:45<19:17:37, 3.40it/s] 36%|███▋ | 135029/371472 [10:43:45<19:04:31, 3.44it/s] 36%|███▋ | 135030/371472 [10:43:46<20:04:26, 3.27it/s] 36%|███▋ | 135031/371472 [10:43:46<19:27:39, 3.37it/s] 36%|███▋ | 135032/371472 [10:43:46<18:43:39, 3.51it/s] 36%|███▋ | 135033/371472 [10:43:46<17:39:50, 3.72it/s] 36%|███▋ | 135034/371472 [10:43:47<18:00:08, 3.65it/s] 36%|███▋ | 135035/371472 [10:43:47<17:48:39, 3.69it/s] 36%|███▋ | 135036/371472 [10:43:47<18:15:22, 3.60it/s] 36%|███▋ | 135037/371472 [10:43:47<18:37:58, 3.52it/s] 36%|███▋ | 135038/371472 [10:43:48<18:05:53, 3.63it/s] 36%|███▋ | 135039/371472 [10:43:48<17:44:39, 3.70it/s] 36%|███▋ | 135040/371472 [10:43:48<17:47:38, 3.69it/s] {'loss': 3.1931, 'learning_rate': 6.731345213212954e-07, 'epoch': 5.82} + 36%|███▋ | 135040/371472 [10:43:48<17:47:38, 3.69it/s] 36%|███▋ | 135041/371472 [10:43:49<17:40:49, 3.71it/s] 36%|███▋ | 135042/371472 [10:43:49<17:19:01, 3.79it/s] 36%|███▋ | 135043/371472 [10:43:49<18:32:48, 3.54it/s] 36%|███▋ | 135044/371472 [10:43:49<17:59:39, 3.65it/s] 36%|███▋ | 135045/371472 [10:43:50<17:50:11, 3.68it/s] 36%|███▋ | 135046/371472 [10:43:50<18:18:47, 3.59it/s] 36%|███▋ | 135047/371472 [10:43:50<19:27:21, 3.38it/s] 36%|███▋ | 135048/371472 [10:43:51<18:37:14, 3.53it/s] 36%|███▋ | 135049/371472 [10:43:51<18:17:32, 3.59it/s] 36%|███▋ | 135050/371472 [10:43:51<18:17:42, 3.59it/s] 36%|███▋ | 135051/371472 [10:43:51<18:12:54, 3.61it/s] 36%|███▋ | 135052/371472 [10:43:52<17:36:18, 3.73it/s] 36%|███▋ | 135053/371472 [10:43:52<17:47:50, 3.69it/s] 36%|███▋ | 135054/371472 [10:43:52<19:49:13, 3.31it/s] 36%|███▋ | 135055/371472 [10:43:52<18:30:32, 3.55it/s] 36%|███▋ | 135056/371472 [10:43:53<17:32:19, 3.74it/s] 36%|███▋ | 135057/371472 [10:43:53<17:21:38, 3.78it/s] 36%|███▋ | 135058/371472 [10:43:53<18:20:05, 3.58it/s] 36%|███▋ | 135059/371472 [10:43:54<17:44:38, 3.70it/s] 36%|███▋ | 135060/371472 [10:43:54<17:24:08, 3.77it/s] {'loss': 3.1744, 'learning_rate': 6.730860393458166e-07, 'epoch': 5.82} + 36%|███▋ | 135060/371472 [10:43:54<17:24:08, 3.77it/s] 36%|███▋ | 135061/371472 [10:43:54<18:20:55, 3.58it/s] 36%|███▋ | 135062/371472 [10:43:54<17:51:14, 3.68it/s] 36%|███▋ | 135063/371472 [10:43:55<18:14:18, 3.60it/s] 36%|███▋ | 135064/371472 [10:43:55<18:23:27, 3.57it/s] 36%|███▋ | 135065/371472 [10:43:55<18:53:44, 3.48it/s] 36%|███▋ | 135066/371472 [10:43:55<18:23:43, 3.57it/s] 36%|███▋ | 135067/371472 [10:43:56<17:42:56, 3.71it/s] 36%|███▋ | 135068/371472 [10:43:56<18:00:48, 3.65it/s] 36%|███▋ | 135069/371472 [10:43:56<17:38:49, 3.72it/s] 36%|███▋ | 135070/371472 [10:43:57<19:24:49, 3.38it/s] 36%|███▋ | 135071/371472 [10:43:57<21:06:49, 3.11it/s] 36%|███▋ | 135072/371472 [10:43:57<19:57:05, 3.29it/s] 36%|███▋ | 135073/371472 [10:43:58<19:16:12, 3.41it/s] 36%|███▋ | 135074/371472 [10:43:58<18:54:57, 3.47it/s] 36%|███▋ | 135075/371472 [10:43:58<18:57:10, 3.46it/s] 36%|███▋ | 135076/371472 [10:43:58<19:04:32, 3.44it/s] 36%|███▋ | 135077/371472 [10:43:59<18:16:58, 3.59it/s] 36%|███▋ | 135078/371472 [10:43:59<18:25:41, 3.56it/s] 36%|███▋ | 135079/371472 [10:43:59<19:10:25, 3.42it/s] 36%|███▋ | 135080/371472 [10:44:00<18:31:46, 3.54it/s] {'loss': 3.0712, 'learning_rate': 6.730375573703377e-07, 'epoch': 5.82} + 36%|███▋ | 135080/371472 [10:44:00<18:31:46, 3.54it/s] 36%|███▋ | 135081/371472 [10:44:00<17:51:20, 3.68it/s] 36%|███▋ | 135082/371472 [10:44:00<17:33:38, 3.74it/s] 36%|███▋ | 135083/371472 [10:44:00<18:34:36, 3.53it/s] 36%|███▋ | 135084/371472 [10:44:01<17:42:11, 3.71it/s] 36%|███▋ | 135085/371472 [10:44:01<19:01:23, 3.45it/s] 36%|███▋ | 135086/371472 [10:44:01<18:49:25, 3.49it/s] 36%|███▋ | 135087/371472 [10:44:02<19:24:35, 3.38it/s] 36%|███▋ | 135088/371472 [10:44:02<18:32:21, 3.54it/s] 36%|███▋ | 135089/371472 [10:44:02<18:29:39, 3.55it/s] 36%|███▋ | 135090/371472 [10:44:02<18:16:37, 3.59it/s] 36%|███▋ | 135091/371472 [10:44:03<18:47:21, 3.49it/s] 36%|███▋ | 135092/371472 [10:44:03<19:20:40, 3.39it/s] 36%|███▋ | 135093/371472 [10:44:03<18:33:59, 3.54it/s] 36%|███▋ | 135094/371472 [10:44:03<18:45:16, 3.50it/s] 36%|███▋ | 135095/371472 [10:44:04<18:32:08, 3.54it/s] 36%|███▋ | 135096/371472 [10:44:04<17:57:35, 3.66it/s] 36%|███▋ | 135097/371472 [10:44:04<17:39:28, 3.72it/s] 36%|███▋ | 135098/371472 [10:44:05<18:00:12, 3.65it/s] 36%|███▋ | 135099/371472 [10:44:05<17:56:50, 3.66it/s] 36%|███▋ | 135100/371472 [10:44:05<17:36:44, 3.73it/s] {'loss': 3.1945, 'learning_rate': 6.729890753948587e-07, 'epoch': 5.82} + 36%|███▋ | 135100/371472 [10:44:05<17:36:44, 3.73it/s] 36%|███▋ | 135101/371472 [10:44:05<17:14:14, 3.81it/s] 36%|███▋ | 135102/371472 [10:44:06<19:20:57, 3.39it/s] 36%|███▋ | 135103/371472 [10:44:06<18:51:51, 3.48it/s] 36%|███▋ | 135104/371472 [10:44:06<20:53:42, 3.14it/s] 36%|███▋ | 135105/371472 [10:44:07<19:19:07, 3.40it/s] 36%|███▋ | 135106/371472 [10:44:07<19:00:13, 3.45it/s] 36%|███▋ | 135107/371472 [10:44:07<17:51:23, 3.68it/s] 36%|███▋ | 135108/371472 [10:44:07<17:21:09, 3.78it/s] 36%|███▋ | 135109/371472 [10:44:08<17:22:54, 3.78it/s] 36%|███▋ | 135110/371472 [10:44:08<17:19:01, 3.79it/s] 36%|███▋ | 135111/371472 [10:44:08<17:50:44, 3.68it/s] 36%|███▋ | 135112/371472 [10:44:08<17:34:09, 3.74it/s] 36%|███▋ | 135113/371472 [10:44:09<18:03:44, 3.63it/s] 36%|███▋ | 135114/371472 [10:44:09<17:38:35, 3.72it/s] 36%|███▋ | 135115/371472 [10:44:09<17:56:14, 3.66it/s] 36%|███▋ | 135116/371472 [10:44:10<17:48:07, 3.69it/s] 36%|███▋ | 135117/371472 [10:44:10<17:38:24, 3.72it/s] 36%|███▋ | 135118/371472 [10:44:10<17:01:12, 3.86it/s] 36%|███▋ | 135119/371472 [10:44:10<16:40:34, 3.94it/s] 36%|███▋ | 135120/371472 [10:44:11<17:13:18, 3.81it/s] {'loss': 2.9604, 'learning_rate': 6.729405934193799e-07, 'epoch': 5.82} + 36%|███▋ | 135120/371472 [10:44:11<17:13:18, 3.81it/s] 36%|███▋ | 135121/371472 [10:44:11<17:11:17, 3.82it/s] 36%|███▋ | 135122/371472 [10:44:11<17:07:13, 3.83it/s] 36%|███▋ | 135123/371472 [10:44:11<17:59:52, 3.65it/s] 36%|███▋ | 135124/371472 [10:44:12<18:00:06, 3.65it/s] 36%|███▋ | 135125/371472 [10:44:12<17:41:05, 3.71it/s] 36%|███▋ | 135126/371472 [10:44:12<17:34:23, 3.74it/s] 36%|███▋ | 135127/371472 [10:44:12<17:14:19, 3.81it/s] 36%|███▋ | 135128/371472 [10:44:13<18:13:26, 3.60it/s] 36%|███▋ | 135129/371472 [10:44:13<17:36:34, 3.73it/s] 36%|███▋ | 135130/371472 [10:44:13<17:23:06, 3.78it/s] 36%|███▋ | 135131/371472 [10:44:14<17:37:25, 3.73it/s] 36%|███▋ | 135132/371472 [10:44:14<17:36:15, 3.73it/s] 36%|███▋ | 135133/371472 [10:44:14<17:19:38, 3.79it/s] 36%|███▋ | 135134/371472 [10:44:14<17:15:38, 3.80it/s] 36%|███▋ | 135135/371472 [10:44:15<17:25:10, 3.77it/s] 36%|███▋ | 135136/371472 [10:44:15<16:58:17, 3.87it/s] 36%|███▋ | 135137/371472 [10:44:15<19:49:58, 3.31it/s] 36%|███▋ | 135138/371472 [10:44:16<20:13:34, 3.25it/s] 36%|███▋ | 135139/371472 [10:44:16<20:12:11, 3.25it/s] 36%|███▋ | 135140/371472 [10:44:16<19:36:21, 3.35it/s] {'loss': 3.0203, 'learning_rate': 6.72892111443901e-07, 'epoch': 5.82} + 36%|███▋ | 135140/371472 [10:44:16<19:36:21, 3.35it/s] 36%|███▋ | 135141/371472 [10:44:16<18:21:33, 3.58it/s] 36%|███▋ | 135142/371472 [10:44:17<18:45:55, 3.50it/s] 36%|███▋ | 135143/371472 [10:44:17<19:20:45, 3.39it/s] 36%|███▋ | 135144/371472 [10:44:17<18:50:11, 3.49it/s] 36%|███▋ | 135145/371472 [10:44:17<18:08:12, 3.62it/s] 36%|███▋ | 135146/371472 [10:44:18<17:37:28, 3.72it/s] 36%|███▋ | 135147/371472 [10:44:18<16:57:08, 3.87it/s] 36%|███▋ | 135148/371472 [10:44:18<18:19:08, 3.58it/s] 36%|███▋ | 135149/371472 [10:44:19<17:57:28, 3.66it/s] 36%|███▋ | 135150/371472 [10:44:19<17:35:31, 3.73it/s] 36%|███▋ | 135151/371472 [10:44:19<17:18:17, 3.79it/s] 36%|███▋ | 135152/371472 [10:44:19<17:11:28, 3.82it/s] 36%|███▋ | 135153/371472 [10:44:20<16:53:19, 3.89it/s] 36%|███▋ | 135154/371472 [10:44:20<16:39:46, 3.94it/s] 36%|███▋ | 135155/371472 [10:44:20<17:08:19, 3.83it/s] 36%|███▋ | 135156/371472 [10:44:20<18:14:45, 3.60it/s] 36%|███▋ | 135157/371472 [10:44:21<17:31:11, 3.75it/s] 36%|███▋ | 135158/371472 [10:44:21<20:12:22, 3.25it/s] 36%|███▋ | 135159/371472 [10:44:21<19:17:26, 3.40it/s] 36%|███▋ | 135160/371472 [10:44:22<18:25:17, 3.56it/s] {'loss': 3.042, 'learning_rate': 6.728436294684219e-07, 'epoch': 5.82} + 36%|███▋ | 135160/371472 [10:44:22<18:25:17, 3.56it/s] 36%|███▋ | 135161/371472 [10:44:22<19:08:25, 3.43it/s] 36%|███▋ | 135162/371472 [10:44:22<18:50:01, 3.49it/s] 36%|███▋ | 135163/371472 [10:44:22<19:35:55, 3.35it/s] 36%|███▋ | 135164/371472 [10:44:23<19:46:02, 3.32it/s] 36%|███▋ | 135165/371472 [10:44:23<18:51:20, 3.48it/s] 36%|███▋ | 135166/371472 [10:44:23<18:22:42, 3.57it/s] 36%|███▋ | 135167/371472 [10:44:24<18:05:13, 3.63it/s] 36%|███▋ | 135168/371472 [10:44:24<17:50:49, 3.68it/s] 36%|███▋ | 135169/371472 [10:44:24<17:40:19, 3.71it/s] 36%|███▋ | 135170/371472 [10:44:24<19:57:46, 3.29it/s] 36%|███▋ | 135171/371472 [10:44:25<20:25:37, 3.21it/s] 36%|███▋ | 135172/371472 [10:44:25<20:32:46, 3.19it/s] 36%|███▋ | 135173/371472 [10:44:25<20:39:48, 3.18it/s] 36%|███▋ | 135174/371472 [10:44:26<19:59:25, 3.28it/s] 36%|███▋ | 135175/371472 [10:44:26<19:18:31, 3.40it/s] 36%|███▋ | 135176/371472 [10:44:26<18:03:08, 3.64it/s] 36%|███▋ | 135177/371472 [10:44:26<17:24:16, 3.77it/s] 36%|███▋ | 135178/371472 [10:44:27<17:54:56, 3.66it/s] 36%|███▋ | 135179/371472 [10:44:27<17:50:48, 3.68it/s] 36%|███▋ | 135180/371472 [10:44:27<18:44:24, 3.50it/s] {'loss': 3.0849, 'learning_rate': 6.727951474929431e-07, 'epoch': 5.82} + 36%|███▋ | 135180/371472 [10:44:27<18:44:24, 3.50it/s] 36%|███▋ | 135181/371472 [10:44:28<18:21:20, 3.58it/s] 36%|███▋ | 135182/371472 [10:44:28<17:57:49, 3.65it/s] 36%|███▋ | 135183/371472 [10:44:28<17:06:51, 3.84it/s] 36%|███▋ | 135184/371472 [10:44:28<17:15:40, 3.80it/s] 36%|███▋ | 135185/371472 [10:44:29<17:28:27, 3.76it/s] 36%|███▋ | 135186/371472 [10:44:29<17:43:47, 3.70it/s] 36%|███▋ | 135187/371472 [10:44:29<17:45:36, 3.70it/s] 36%|███▋ | 135188/371472 [10:44:29<18:05:57, 3.63it/s] 36%|███▋ | 135189/371472 [10:44:30<17:36:56, 3.73it/s] 36%|███▋ | 135190/371472 [10:44:30<17:02:23, 3.85it/s] 36%|███▋ | 135191/371472 [10:44:30<16:42:30, 3.93it/s] 36%|███▋ | 135192/371472 [10:44:31<17:14:09, 3.81it/s] 36%|███▋ | 135193/371472 [10:44:31<17:09:45, 3.82it/s] 36%|███▋ | 135194/371472 [10:44:31<17:04:44, 3.84it/s] 36%|███▋ | 135195/371472 [10:44:31<17:47:27, 3.69it/s] 36%|███▋ | 135196/371472 [10:44:32<17:36:46, 3.73it/s] 36%|███▋ | 135197/371472 [10:44:32<17:51:17, 3.68it/s] 36%|███▋ | 135198/371472 [10:44:32<19:07:59, 3.43it/s] 36%|███▋ | 135199/371472 [10:44:32<18:29:23, 3.55it/s] 36%|███▋ | 135200/371472 [10:44:33<18:47:48, 3.49it/s] {'loss': 3.1667, 'learning_rate': 6.727466655174643e-07, 'epoch': 5.82} + 36%|███▋ | 135200/371472 [10:44:33<18:47:48, 3.49it/s] 36%|███▋ | 135201/371472 [10:44:33<19:00:22, 3.45it/s] 36%|███▋ | 135202/371472 [10:44:33<18:35:09, 3.53it/s] 36%|███▋ | 135203/371472 [10:44:34<17:38:43, 3.72it/s] 36%|███▋ | 135204/371472 [10:44:34<18:11:19, 3.61it/s] 36%|███▋ | 135205/371472 [10:44:34<19:01:19, 3.45it/s] 36%|███▋ | 135206/371472 [10:44:34<19:08:12, 3.43it/s] 36%|███▋ | 135207/371472 [10:44:35<19:17:12, 3.40it/s] 36%|███▋ | 135208/371472 [10:44:35<19:05:52, 3.44it/s] 36%|███▋ | 135209/371472 [10:44:35<20:02:09, 3.28it/s] 36%|███▋ | 135210/371472 [10:44:36<19:17:00, 3.40it/s] 36%|███▋ | 135211/371472 [10:44:36<18:26:46, 3.56it/s] 36%|███▋ | 135212/371472 [10:44:36<17:43:29, 3.70it/s] 36%|███▋ | 135213/371472 [10:44:36<17:43:17, 3.70it/s] 36%|███▋ | 135214/371472 [10:44:37<17:24:36, 3.77it/s] 36%|███▋ | 135215/371472 [10:44:37<18:04:39, 3.63it/s] 36%|███▋ | 135216/371472 [10:44:37<18:09:08, 3.62it/s] 36%|█��█▋ | 135217/371472 [10:44:37<17:23:58, 3.77it/s] 36%|███▋ | 135218/371472 [10:44:38<17:19:29, 3.79it/s] 36%|███▋ | 135219/371472 [10:44:38<18:03:01, 3.64it/s] 36%|███▋ | 135220/371472 [10:44:38<18:19:45, 3.58it/s] {'loss': 3.103, 'learning_rate': 6.726981835419854e-07, 'epoch': 5.82} + 36%|███▋ | 135220/371472 [10:44:38<18:19:45, 3.58it/s] 36%|███▋ | 135221/371472 [10:44:39<18:33:50, 3.54it/s] 36%|███▋ | 135222/371472 [10:44:39<19:54:14, 3.30it/s] 36%|███▋ | 135223/371472 [10:44:39<19:58:36, 3.29it/s] 36%|███▋ | 135224/371472 [10:44:40<19:11:00, 3.42it/s] 36%|███▋ | 135225/371472 [10:44:40<19:27:27, 3.37it/s] 36%|███▋ | 135226/371472 [10:44:40<18:59:39, 3.45it/s] 36%|███▋ | 135227/371472 [10:44:40<18:34:35, 3.53it/s] 36%|███▋ | 135228/371472 [10:44:41<19:47:02, 3.32it/s] 36%|███▋ | 135229/371472 [10:44:41<18:44:08, 3.50it/s] 36%|███▋ | 135230/371472 [10:44:41<17:44:43, 3.70it/s] 36%|███▋ | 135231/371472 [10:44:42<17:56:37, 3.66it/s] 36%|███▋ | 135232/371472 [10:44:42<17:19:59, 3.79it/s] 36%|███▋ | 135233/371472 [10:44:42<17:25:26, 3.77it/s] 36%|███▋ | 135234/371472 [10:44:42<17:41:23, 3.71it/s] 36%|███▋ | 135235/371472 [10:44:43<19:21:38, 3.39it/s] 36%|███▋ | 135236/371472 [10:44:43<19:36:52, 3.35it/s] 36%|███▋ | 135237/371472 [10:44:43<19:21:35, 3.39it/s] 36%|███▋ | 135238/371472 [10:44:44<20:33:14, 3.19it/s] 36%|███▋ | 135239/371472 [10:44:44<20:13:15, 3.25it/s] 36%|███▋ | 135240/371472 [10:44:44<18:46:39, 3.49it/s] {'loss': 3.0376, 'learning_rate': 6.726497015665064e-07, 'epoch': 5.83} + 36%|███▋ | 135240/371472 [10:44:44<18:46:39, 3.49it/s] 36%|███▋ | 135241/371472 [10:44:44<18:04:47, 3.63it/s] 36%|███▋ | 135242/371472 [10:44:45<17:33:19, 3.74it/s] 36%|███▋ | 135243/371472 [10:44:45<17:44:06, 3.70it/s] 36%|███▋ | 135244/371472 [10:44:45<17:16:50, 3.80it/s] 36%|███▋ | 135245/371472 [10:44:45<17:15:49, 3.80it/s] 36%|███▋ | 135246/371472 [10:44:46<17:06:01, 3.84it/s] 36%|███▋ | 135247/371472 [10:44:46<17:53:17, 3.67it/s] 36%|███▋ | 135248/371472 [10:44:46<21:25:11, 3.06it/s] 36%|███▋ | 135249/371472 [10:44:47<19:48:42, 3.31it/s] 36%|███▋ | 135250/371472 [10:44:47<18:39:55, 3.52it/s] 36%|███▋ | 135251/371472 [10:44:47<18:09:20, 3.61it/s] 36%|███▋ | 135252/371472 [10:44:47<17:37:35, 3.72it/s] 36%|███▋ | 135253/371472 [10:44:48<17:20:55, 3.78it/s] 36%|███▋ | 135254/371472 [10:44:48<18:24:16, 3.57it/s] 36%|███▋ | 135255/371472 [10:44:48<18:33:50, 3.53it/s] 36%|███▋ | 135256/371472 [10:44:49<18:46:13, 3.50it/s] 36%|███▋ | 135257/371472 [10:44:49<18:11:22, 3.61it/s] 36%|███▋ | 135258/371472 [10:44:49<17:45:49, 3.69it/s] 36%|███▋ | 135259/371472 [10:44:49<17:41:48, 3.71it/s] 36%|███▋ | 135260/371472 [10:44:50<19:00:32, 3.45it/s] {'loss': 3.2213, 'learning_rate': 6.726012195910275e-07, 'epoch': 5.83} + 36%|███▋ | 135260/371472 [10:44:50<19:00:32, 3.45it/s] 36%|███▋ | 135261/371472 [10:44:50<18:59:13, 3.46it/s] 36%|███▋ | 135262/371472 [10:44:50<18:55:10, 3.47it/s] 36%|███▋ | 135263/371472 [10:44:51<17:57:53, 3.65it/s] 36%|███▋ | 135264/371472 [10:44:51<17:46:13, 3.69it/s] 36%|███▋ | 135265/371472 [10:44:51<17:34:33, 3.73it/s] 36%|███▋ | 135266/371472 [10:44:51<17:50:06, 3.68it/s] 36%|███▋ | 135267/371472 [10:44:52<17:05:50, 3.84it/s] 36%|███▋ | 135268/371472 [10:44:52<17:23:10, 3.77it/s] 36%|███▋ | 135269/371472 [10:44:52<17:27:20, 3.76it/s] 36%|███▋ | 135270/371472 [10:44:52<17:13:05, 3.81it/s] 36%|███▋ | 135271/371472 [10:44:53<17:34:13, 3.73it/s] 36%|███▋ | 135272/371472 [10:44:53<17:57:01, 3.66it/s] 36%|███▋ | 135273/371472 [10:44:53<17:38:01, 3.72it/s] 36%|███▋ | 135274/371472 [10:44:53<17:55:48, 3.66it/s] 36%|███▋ | 135275/371472 [10:44:54<17:58:23, 3.65it/s] 36%|███▋ | 135276/371472 [10:44:54<17:45:36, 3.69it/s] 36%|███▋ | 135277/371472 [10:44:54<19:16:07, 3.40it/s] 36%|███▋ | 135278/371472 [10:44:55<18:39:12, 3.52it/s] 36%|███▋ | 135279/371472 [10:44:55<18:14:24, 3.60it/s] 36%|███▋ | 135280/371472 [10:44:55<18:02:09, 3.64it/s] {'loss': 3.1859, 'learning_rate': 6.725527376155487e-07, 'epoch': 5.83} + 36%|███▋ | 135280/371472 [10:44:55<18:02:09, 3.64it/s] 36%|███▋ | 135281/371472 [10:44:55<17:40:49, 3.71it/s] 36%|███▋ | 135282/371472 [10:44:56<17:37:28, 3.72it/s] 36%|███▋ | 135283/371472 [10:44:56<17:46:34, 3.69it/s] 36%|███▋ | 135284/371472 [10:44:56<18:56:50, 3.46it/s] 36%|███▋ | 135285/371472 [10:44:57<19:19:04, 3.40it/s] 36%|███▋ | 135286/371472 [10:44:57<18:55:52, 3.47it/s] 36%|███▋ | 135287/371472 [10:44:57<18:13:25, 3.60it/s] 36%|███▋ | 135288/371472 [10:44:57<18:44:25, 3.50it/s] 36%|███▋ | 135289/371472 [10:44:58<17:56:35, 3.66it/s] 36%|███▋ | 135290/371472 [10:44:58<17:58:13, 3.65it/s] 36%|███▋ | 135291/371472 [10:44:58<18:00:28, 3.64it/s] 36%|███▋ | 135292/371472 [10:44:58<17:51:42, 3.67it/s] 36%|███▋ | 135293/371472 [10:44:59<17:23:14, 3.77it/s] 36%|███▋ | 135294/371472 [10:44:59<17:35:24, 3.73it/s] 36%|███▋ | 135295/371472 [10:44:59<18:34:05, 3.53it/s] 36%|███▋ | 135296/371472 [10:45:00<17:57:04, 3.65it/s] 36%|███▋ | 135297/371472 [10:45:00<18:05:53, 3.62it/s] 36%|███▋ | 135298/371472 [10:45:00<19:07:50, 3.43it/s] 36%|███▋ | 135299/371472 [10:45:00<18:29:33, 3.55it/s] 36%|███▋ | 135300/371472 [10:45:01<19:51:47, 3.30it/s] {'loss': 3.0732, 'learning_rate': 6.725042556400698e-07, 'epoch': 5.83} + 36%|███▋ | 135300/371472 [10:45:01<19:51:47, 3.30it/s] 36%|███▋ | 135301/371472 [10:45:01<18:37:28, 3.52it/s] 36%|███▋ | 135302/371472 [10:45:01<18:06:43, 3.62it/s] 36%|███▋ | 135303/371472 [10:45:02<19:31:44, 3.36it/s] 36%|███▋ | 135304/371472 [10:45:02<19:14:53, 3.41it/s] 36%|███▋ | 135305/371472 [10:45:02<20:48:38, 3.15it/s] 36%|███▋ | 135306/371472 [10:45:03<20:33:14, 3.19it/s] 36%|███▋ | 135307/371472 [10:45:03<19:21:18, 3.39it/s] 36%|███▋ | 135308/371472 [10:45:03<18:19:27, 3.58it/s] 36%|███▋ | 135309/371472 [10:45:03<17:43:51, 3.70it/s] 36%|███▋ | 135310/371472 [10:45:04<19:12:44, 3.41it/s] 36%|███▋ | 135311/371472 [10:45:04<18:31:00, 3.54it/s] 36%|███▋ | 135312/371472 [10:45:04<18:24:35, 3.56it/s] 36%|███▋ | 135313/371472 [10:45:04<18:30:01, 3.55it/s] 36%|███▋ | 135314/371472 [10:45:05<17:49:31, 3.68it/s] 36%|███▋ | 135315/371472 [10:45:05<17:01:00, 3.85it/s] 36%|███▋ | 135316/371472 [10:45:05<17:32:30, 3.74it/s] 36%|███▋ | 135317/371472 [10:45:06<17:51:44, 3.67it/s] 36%|███▋ | 135318/371472 [10:45:06<17:32:08, 3.74it/s] 36%|███▋ | 135319/371472 [10:45:06<17:48:42, 3.68it/s] 36%|███▋ | 135320/371472 [10:45:06<19:02:09, 3.45it/s] {'loss': 3.0541, 'learning_rate': 6.724557736645908e-07, 'epoch': 5.83} + 36%|███▋ | 135320/371472 [10:45:06<19:02:09, 3.45it/s] 36%|███▋ | 135321/371472 [10:45:07<18:47:29, 3.49it/s] 36%|███▋ | 135322/371472 [10:45:07<19:51:44, 3.30it/s] 36%|███▋ | 135323/371472 [10:45:07<19:04:48, 3.44it/s] 36%|███▋ | 135324/371472 [10:45:08<19:50:33, 3.31it/s] 36%|███▋ | 135325/371472 [10:45:08<18:56:14, 3.46it/s] 36%|███▋ | 135326/371472 [10:45:08<18:36:41, 3.52it/s] 36%|███▋ | 135327/371472 [10:45:08<18:09:31, 3.61it/s] 36%|███▋ | 135328/371472 [10:45:09<17:47:08, 3.69it/s] 36%|███▋ | 135329/371472 [10:45:09<18:43:36, 3.50it/s] 36%|███▋ | 135330/371472 [10:45:09<18:38:54, 3.52it/s] 36%|███▋ | 135331/371472 [10:45:10<18:47:59, 3.49it/s] 36%|███▋ | 135332/371472 [10:45:10<18:29:06, 3.55it/s] 36%|███▋ | 135333/371472 [10:45:10<18:00:05, 3.64it/s] 36%|███▋ | 135334/371472 [10:45:10<18:32:18, 3.54it/s] 36%|███▋ | 135335/371472 [10:45:11<19:10:38, 3.42it/s] 36%|███▋ | 135336/371472 [10:45:11<18:26:23, 3.56it/s] 36%|███▋ | 135337/371472 [10:45:11<18:08:14, 3.62it/s] 36%|███▋ | 135338/371472 [10:45:11<17:29:02, 3.75it/s] 36%|███▋ | 135339/371472 [10:45:12<17:50:15, 3.68it/s] 36%|███▋ | 135340/371472 [10:45:12<17:27:24, 3.76it/s] {'loss': 3.1965, 'learning_rate': 6.72407291689112e-07, 'epoch': 5.83} + 36%|███▋ | 135340/371472 [10:45:12<17:27:24, 3.76it/s] 36%|███▋ | 135341/371472 [10:45:12<17:20:51, 3.78it/s] 36%|███▋ | 135342/371472 [10:45:13<18:58:37, 3.46it/s] 36%|███▋ | 135343/371472 [10:45:13<18:06:17, 3.62it/s] 36%|███▋ | 135344/371472 [10:45:13<17:24:26, 3.77it/s] 36%|███▋ | 135345/371472 [10:45:13<17:06:34, 3.83it/s] 36%|███▋ | 135346/371472 [10:45:14<17:27:18, 3.76it/s] 36%|███▋ | 135347/371472 [10:45:14<17:35:30, 3.73it/s] 36%|███▋ | 135348/371472 [10:45:14<17:27:49, 3.76it/s] 36%|███▋ | 135349/371472 [10:45:14<17:22:15, 3.78it/s] 36%|███▋ | 135350/371472 [10:45:15<17:57:01, 3.65it/s] 36%|███▋ | 135351/371472 [10:45:15<18:08:33, 3.62it/s] 36%|███▋ | 135352/371472 [10:45:15<18:41:13, 3.51it/s] 36%|███▋ | 135353/371472 [10:45:16<18:55:33, 3.47it/s] 36%|███▋ | 135354/371472 [10:45:16<18:15:19, 3.59it/s] 36%|███▋ | 135355/371472 [10:45:16<18:04:30, 3.63it/s] 36%|███▋ | 135356/371472 [10:45:16<17:27:50, 3.76it/s] 36%|███▋ | 135357/371472 [10:45:17<18:03:04, 3.63it/s] 36%|███▋ | 135358/371472 [10:45:17<17:41:28, 3.71it/s] 36%|███▋ | 135359/371472 [10:45:17<18:32:00, 3.54it/s] 36%|███▋ | 135360/371472 [10:45:17<17:55:24, 3.66it/s] {'loss': 3.1839, 'learning_rate': 6.723588097136332e-07, 'epoch': 5.83} + 36%|███▋ | 135360/371472 [10:45:17<17:55:24, 3.66it/s] 36%|███▋ | 135361/371472 [10:45:18<18:01:00, 3.64it/s] 36%|███▋ | 135362/371472 [10:45:18<18:36:52, 3.52it/s] 36%|███▋ | 135363/371472 [10:45:18<18:43:32, 3.50it/s] 36%|███▋ | 135364/371472 [10:45:19<17:50:45, 3.68it/s] 36%|███▋ | 135365/371472 [10:45:19<17:13:36, 3.81it/s] 36%|███▋ | 135366/371472 [10:45:19<18:14:08, 3.60it/s] 36%|███▋ | 135367/371472 [10:45:19<18:19:32, 3.58it/s] 36%|███▋ | 135368/371472 [10:45:20<18:31:44, 3.54it/s] 36%|███▋ | 135369/371472 [10:45:20<17:29:44, 3.75it/s] 36%|███▋ | 135370/371472 [10:45:20<18:18:47, 3.58it/s] 36%|███▋ | 135371/371472 [10:45:21<18:06:54, 3.62it/s] 36%|███▋ | 135372/371472 [10:45:21<17:46:49, 3.69it/s] 36%|███▋ | 135373/371472 [10:45:21<18:54:06, 3.47it/s] 36%|███▋ | 135374/371472 [10:45:21<18:58:32, 3.46it/s] 36%|███▋ | 135375/371472 [10:45:22<18:31:52, 3.54it/s] 36%|███▋ | 135376/371472 [10:45:22<19:38:03, 3.34it/s] 36%|███▋ | 135377/371472 [10:45:22<19:44:21, 3.32it/s] 36%|███▋ | 135378/371472 [10:45:23<19:38:23, 3.34it/s] 36%|███▋ | 135379/371472 [10:45:23<18:42:15, 3.51it/s] 36%|███▋ | 135380/371472 [10:45:23<17:30:48, 3.74it/s] {'loss': 3.1495, 'learning_rate': 6.723103277381542e-07, 'epoch': 5.83} + 36%|███▋ | 135380/371472 [10:45:23<17:30:48, 3.74it/s] 36%|███▋ | 135381/371472 [10:45:24<21:16:44, 3.08it/s] 36%|███▋ | 135382/371472 [10:45:24<20:22:10, 3.22it/s] 36%|███▋ | 135383/371472 [10:45:24<19:12:25, 3.41it/s] 36%|███▋ | 135384/371472 [10:45:24<18:08:25, 3.62it/s] 36%|███▋ | 135385/371472 [10:45:25<17:34:03, 3.73it/s] 36%|███▋ | 135386/371472 [10:45:25<18:15:18, 3.59it/s] 36%|███▋ | 135387/371472 [10:45:25<18:46:30, 3.49it/s] 36%|███▋ | 135388/371472 [10:45:25<19:09:40, 3.42it/s] 36%|███▋ | 135389/371472 [10:45:26<19:48:34, 3.31it/s] 36%|███▋ | 135390/371472 [10:45:26<20:35:52, 3.18it/s] 36%|███▋ | 135391/371472 [10:45:26<19:13:36, 3.41it/s] 36%|███▋ | 135392/371472 [10:45:27<18:54:38, 3.47it/s] 36%|███▋ | 135393/371472 [10:45:27<18:37:21, 3.52it/s] 36%|███▋ | 135394/371472 [10:45:27<17:59:06, 3.65it/s] 36%|███▋ | 135395/371472 [10:45:27<17:54:59, 3.66it/s] 36%|███▋ | 135396/371472 [10:45:28<17:37:47, 3.72it/s] 36%|███▋ | 135397/371472 [10:45:28<18:17:23, 3.59it/s] 36%|███▋ | 135398/371472 [10:45:28<17:53:48, 3.66it/s] 36%|███▋ | 135399/371472 [10:45:29<18:02:41, 3.63it/s] 36%|███▋ | 135400/371472 [10:45:29<18:11:31, 3.60it/s] {'loss': 3.0962, 'learning_rate': 6.722618457626752e-07, 'epoch': 5.83} + 36%|███▋ | 135400/371472 [10:45:29<18:11:31, 3.60it/s] 36%|███▋ | 135401/371472 [10:45:29<17:37:37, 3.72it/s] 36%|███▋ | 135402/371472 [10:45:29<17:04:34, 3.84it/s] 36%|███▋ | 135403/371472 [10:45:30<17:30:22, 3.75it/s] 36%|███▋ | 135404/371472 [10:45:30<17:32:05, 3.74it/s] 36%|███▋ | 135405/371472 [10:45:30<17:58:19, 3.65it/s] 36%|███▋ | 135406/371472 [10:45:31<21:07:42, 3.10it/s] 36%|██���▋ | 135407/371472 [10:45:31<21:11:26, 3.09it/s] 36%|███▋ | 135408/371472 [10:45:31<21:04:11, 3.11it/s] 36%|███▋ | 135409/371472 [10:45:32<19:44:23, 3.32it/s] 36%|███▋ | 135410/371472 [10:45:32<19:23:04, 3.38it/s] 36%|███▋ | 135411/371472 [10:45:32<19:31:11, 3.36it/s] 36%|███▋ | 135412/371472 [10:45:32<18:53:49, 3.47it/s] 36%|███▋ | 135413/371472 [10:45:33<19:15:04, 3.41it/s] 36%|███▋ | 135414/371472 [10:45:33<18:09:15, 3.61it/s] 36%|███▋ | 135415/371472 [10:45:33<18:14:27, 3.59it/s] 36%|███▋ | 135416/371472 [10:45:34<19:15:03, 3.41it/s] 36%|███▋ | 135417/371472 [10:45:34<18:21:38, 3.57it/s] 36%|███▋ | 135418/371472 [10:45:34<18:41:16, 3.51it/s] 36%|███▋ | 135419/371472 [10:45:34<18:46:32, 3.49it/s] 36%|███▋ | 135420/371472 [10:45:35<19:26:01, 3.37it/s] {'loss': 3.239, 'learning_rate': 6.722133637871964e-07, 'epoch': 5.83} + 36%|███▋ | 135420/371472 [10:45:35<19:26:01, 3.37it/s] 36%|███▋ | 135421/371472 [10:45:35<19:13:14, 3.41it/s] 36%|███▋ | 135422/371472 [10:45:35<18:19:19, 3.58it/s] 36%|███▋ | 135423/371472 [10:45:36<19:42:29, 3.33it/s] 36%|███▋ | 135424/371472 [10:45:36<19:22:02, 3.39it/s] 36%|███▋ | 135425/371472 [10:45:36<18:45:42, 3.49it/s] 36%|███▋ | 135426/371472 [10:45:36<17:52:41, 3.67it/s] 36%|███▋ | 135427/371472 [10:45:37<17:59:29, 3.64it/s] 36%|███▋ | 135428/371472 [10:45:37<19:07:03, 3.43it/s] 36%|███▋ | 135429/371472 [10:45:37<19:21:37, 3.39it/s] 36%|███▋ | 135430/371472 [10:45:38<20:01:47, 3.27it/s] 36%|███▋ | 135431/371472 [10:45:38<19:11:29, 3.42it/s] 36%|███▋ | 135432/371472 [10:45:38<19:05:24, 3.43it/s] 36%|███▋ | 135433/371472 [10:45:38<18:29:14, 3.55it/s] 36%|███▋ | 135434/371472 [10:45:39<18:38:18, 3.52it/s] 36%|███▋ | 135435/371472 [10:45:39<19:12:04, 3.41it/s] 36%|███▋ | 135436/371472 [10:45:39<19:48:25, 3.31it/s] 36%|███▋ | 135437/371472 [10:45:40<19:35:08, 3.35it/s] 36%|███▋ | 135438/371472 [10:45:40<18:44:11, 3.50it/s] 36%|███▋ | 135439/371472 [10:45:40<19:23:41, 3.38it/s] 36%|███▋ | 135440/371472 [10:45:41<19:45:27, 3.32it/s] {'loss': 3.0408, 'learning_rate': 6.721648818117176e-07, 'epoch': 5.83} + 36%|███▋ | 135440/371472 [10:45:41<19:45:27, 3.32it/s] 36%|███▋ | 135441/371472 [10:45:41<18:49:15, 3.48it/s] 36%|███▋ | 135442/371472 [10:45:41<19:22:33, 3.38it/s] 36%|███▋ | 135443/371472 [10:45:41<18:11:22, 3.60it/s] 36%|███▋ | 135444/371472 [10:45:42<18:56:31, 3.46it/s] 36%|███▋ | 135445/371472 [10:45:42<19:10:39, 3.42it/s] 36%|███▋ | 135446/371472 [10:45:42<20:38:16, 3.18it/s] 36%|███▋ | 135447/371472 [10:45:43<19:11:17, 3.42it/s] 36%|███▋ | 135448/371472 [10:45:43<18:23:42, 3.56it/s] 36%|███▋ | 135449/371472 [10:45:43<18:30:59, 3.54it/s] 36%|███▋ | 135450/371472 [10:45:43<18:32:09, 3.54it/s] 36%|███▋ | 135451/371472 [10:45:44<19:02:26, 3.44it/s] 36%|███▋ | 135452/371472 [10:45:44<20:15:18, 3.24it/s] 36%|███▋ | 135453/371472 [10:45:44<19:35:41, 3.35it/s] 36%|███▋ | 135454/371472 [10:45:45<19:14:48, 3.41it/s] 36%|███▋ | 135455/371472 [10:45:45<21:10:54, 3.10it/s] 36%|███▋ | 135456/371472 [10:45:45<21:07:43, 3.10it/s] 36%|███▋ | 135457/371472 [10:45:46<19:50:11, 3.31it/s] 36%|███▋ | 135458/371472 [10:45:46<18:48:10, 3.49it/s] 36%|███▋ | 135459/371472 [10:45:46<19:12:30, 3.41it/s] 36%|███▋ | 135460/371472 [10:45:46<19:06:40, 3.43it/s] {'loss': 3.0296, 'learning_rate': 6.721163998362387e-07, 'epoch': 5.83} + 36%|███▋ | 135460/371472 [10:45:46<19:06:40, 3.43it/s] 36%|███▋ | 135461/371472 [10:45:47<19:01:17, 3.45it/s] 36%|███▋ | 135462/371472 [10:45:47<19:13:10, 3.41it/s] 36%|███▋ | 135463/371472 [10:45:47<18:28:23, 3.55it/s] 36%|███▋ | 135464/371472 [10:45:47<17:49:20, 3.68it/s] 36%|███▋ | 135465/371472 [10:45:48<17:34:17, 3.73it/s] 36%|███▋ | 135466/371472 [10:45:48<17:15:29, 3.80it/s] 36%|███▋ | 135467/371472 [10:45:48<18:12:50, 3.60it/s] 36%|███▋ | 135468/371472 [10:45:49<17:55:53, 3.66it/s] 36%|███▋ | 135469/371472 [10:45:49<18:10:42, 3.61it/s] 36%|███▋ | 135470/371472 [10:45:49<18:32:54, 3.53it/s] 36%|███▋ | 135471/371472 [10:45:50<20:54:32, 3.14it/s] 36%|███▋ | 135472/371472 [10:45:50<21:10:10, 3.10it/s] 36%|███▋ | 135473/371472 [10:45:50<20:31:13, 3.19it/s] 36%|███▋ | 135474/371472 [10:45:50<19:51:02, 3.30it/s] 36%|███▋ | 135475/371472 [10:45:51<19:37:07, 3.34it/s] 36%|███▋ | 135476/371472 [10:45:51<20:10:16, 3.25it/s] 36%|███▋ | 135477/371472 [10:45:51<19:06:56, 3.43it/s] 36%|███▋ | 135478/371472 [10:45:52<19:09:53, 3.42it/s] 36%|███▋ | 135479/371472 [10:45:52<19:22:34, 3.38it/s] 36%|███▋ | 135480/371472 [10:45:52<22:39:23, 2.89it/s] {'loss': 3.2457, 'learning_rate': 6.720679178607597e-07, 'epoch': 5.84} + 36%|███▋ | 135480/371472 [10:45:52<22:39:23, 2.89it/s] 36%|███▋ | 135481/371472 [10:45:53<21:21:49, 3.07it/s] 36%|███▋ | 135482/371472 [10:45:53<20:34:04, 3.19it/s] 36%|███▋ | 135483/371472 [10:45:53<19:20:58, 3.39it/s] 36%|███▋ | 135484/371472 [10:45:53<18:18:36, 3.58it/s] 36%|███▋ | 135485/371472 [10:45:54<17:52:35, 3.67it/s] 36%|███▋ | 135486/371472 [10:45:54<17:50:54, 3.67it/s] 36%|███▋ | 135487/371472 [10:45:54<18:09:25, 3.61it/s] 36%|███▋ | 135488/371472 [10:45:55<18:25:55, 3.56it/s] 36%|███▋ | 135489/371472 [10:45:55<18:14:28, 3.59it/s] 36%|███▋ | 135490/371472 [10:45:55<17:54:18, 3.66it/s] 36%|███▋ | 135491/371472 [10:45:55<18:04:28, 3.63it/s] 36%|███▋ | 135492/371472 [10:45:56<17:40:55, 3.71it/s] 36%|███▋ | 135493/371472 [10:45:56<17:27:55, 3.75it/s] 36%|███▋ | 135494/371472 [10:45:56<18:44:38, 3.50it/s] 36%|███▋ | 135495/371472 [10:45:56<18:32:38, 3.53it/s] 36%|███▋ | 135496/371472 [10:45:57<19:16:00, 3.40it/s] 36%|███▋ | 135497/371472 [10:45:57<18:32:42, 3.53it/s] 36%|███▋ | 135498/371472 [10:45:57<18:18:39, 3.58it/s] 36%|███▋ | 135499/371472 [10:45:58<17:46:20, 3.69it/s] 36%|███▋ | 135500/371472 [10:45:58<17:55:19, 3.66it/s] {'loss': 3.1288, 'learning_rate': 6.720194358852809e-07, 'epoch': 5.84} + 36%|███▋ | 135500/371472 [10:45:58<17:55:19, 3.66it/s] 36%|███▋ | 135501/371472 [10:45:58<18:32:34, 3.53it/s] 36%|███▋ | 135502/371472 [10:45:58<18:12:23, 3.60it/s] 36%|███▋ | 135503/371472 [10:45:59<18:48:02, 3.49it/s] 36%|███▋ | 135504/371472 [10:45:59<17:53:58, 3.66it/s] 36%|███▋ | 135505/371472 [10:45:59<17:08:53, 3.82it/s] 36%|███▋ | 135506/371472 [10:45:59<16:42:48, 3.92it/s] 36%|███▋ | 135507/371472 [10:46:00<16:36:52, 3.95it/s] 36%|███▋ | 135508/371472 [10:46:00<17:22:18, 3.77it/s] 36%|███▋ | 135509/371472 [10:46:00<16:40:12, 3.93it/s] 36%|███▋ | 135510/371472 [10:46:01<16:51:26, 3.89it/s] 36%|███▋ | 135511/371472 [10:46:01<16:29:32, 3.97it/s] 36%|███▋ | 135512/371472 [10:46:01<16:37:25, 3.94it/s] 36%|███▋ | 135513/371472 [10:46:01<16:21:09, 4.01it/s] 36%|███▋ | 135514/371472 [10:46:02<18:26:49, 3.55it/s] 36%|███▋ | 135515/371472 [10:46:02<18:44:06, 3.50it/s] 36%|███▋ | 135516/371472 [10:46:02<20:30:14, 3.20it/s] 36%|███▋ | 135517/371472 [10:46:03<19:36:16, 3.34it/s] 36%|███▋ | 135518/371472 [10:46:03<20:46:00, 3.16it/s] 36%|███▋ | 135519/371472 [10:46:03<19:14:07, 3.41it/s] 36%|███▋ | 135520/371472 [10:46:03<19:01:18, 3.45it/s] {'loss': 3.1188, 'learning_rate': 6.71970953909802e-07, 'epoch': 5.84} + 36%|███▋ | 135520/371472 [10:46:03<19:01:18, 3.45it/s] 36%|███▋ | 135521/371472 [10:46:04<18:38:56, 3.51it/s] 36%|███▋ | 135522/371472 [10:46:04<18:39:14, 3.51it/s] 36%|███▋ | 135523/371472 [10:46:04<17:46:47, 3.69it/s] 36%|███▋ | 135524/371472 [10:46:04<17:48:50, 3.68it/s] 36%|███▋ | 135525/371472 [10:46:05<17:59:46, 3.64it/s] 36%|███▋ | 135526/371472 [10:46:05<17:27:57, 3.75it/s] 36%|███▋ | 135527/371472 [10:46:05<17:14:56, 3.80it/s] 36%|███▋ | 135528/371472 [10:46:06<17:08:18, 3.82it/s] 36%|███▋ | 135529/371472 [10:46:06<17:17:03, 3.79it/s] 36%|███▋ | 135530/371472 [10:46:06<17:58:22, 3.65it/s] 36%|███▋ | 135531/371472 [10:46:06<17:31:11, 3.74it/s] 36%|███▋ | 135532/371472 [10:46:07<17:16:26, 3.79it/s] 36%|███▋ | 135533/371472 [10:46:07<16:37:58, 3.94it/s] 36%|███▋ | 135534/371472 [10:46:07<16:54:59, 3.87it/s] 36%|███▋ | 135535/371472 [10:46:07<17:05:26, 3.83it/s] 36%|███▋ | 135536/371472 [10:46:08<17:02:16, 3.85it/s] 36%|███▋ | 135537/371472 [10:46:08<17:18:13, 3.79it/s] 36%|███▋ | 135538/371472 [10:46:08<17:18:40, 3.79it/s] 36%|███▋ | 135539/371472 [10:46:08<17:39:34, 3.71it/s] 36%|███▋ | 135540/371472 [10:46:09<18:58:46, 3.45it/s] {'loss': 3.1655, 'learning_rate': 6.71922471934323e-07, 'epoch': 5.84} + 36%|███▋ | 135540/371472 [10:46:09<18:58:46, 3.45it/s] 36%|███▋ | 135541/371472 [10:46:09<18:02:35, 3.63it/s] 36%|███▋ | 135542/371472 [10:46:09<18:05:28, 3.62it/s] 36%|███▋ | 135543/371472 [10:46:10<18:12:57, 3.60it/s] 36%|███▋ | 135544/371472 [10:46:10<17:22:40, 3.77it/s] 36%|███▋ | 135545/371472 [10:46:10<18:23:29, 3.56it/s] 36%|███▋ | 135546/371472 [10:46:10<19:54:48, 3.29it/s] 36%|███▋ | 135547/371472 [10:46:11<19:27:07, 3.37it/s] 36%|███▋ | 135548/371472 [10:46:11<19:57:10, 3.28it/s] 36%|███▋ | 135549/371472 [10:46:11<19:27:15, 3.37it/s] 36%|███▋ | 135550/371472 [10:46:12<19:44:20, 3.32it/s] 36%|███▋ | 135551/371472 [10:46:12<19:23:52, 3.38it/s] 36%|███▋ | 135552/371472 [10:46:12<18:47:13, 3.49it/s] 36%|███▋ | 135553/371472 [10:46:12<17:38:34, 3.71it/s] 36%|███▋ | 135554/371472 [10:46:13<17:52:46, 3.67it/s] 36%|███▋ | 135555/371472 [10:46:13<17:57:32, 3.65it/s] 36%|███▋ | 135556/371472 [10:46:13<17:51:51, 3.67it/s] 36%|███▋ | 135557/371472 [10:46:14<19:25:31, 3.37it/s] 36%|███▋ | 135558/371472 [10:46:14<19:27:29, 3.37it/s] 36%|███▋ | 135559/371472 [10:46:14<19:35:20, 3.35it/s] 36%|███▋ | 135560/371472 [10:46:15<19:40:47, 3.33it/s] {'loss': 3.1493, 'learning_rate': 6.718739899588441e-07, 'epoch': 5.84} + 36%|███▋ | 135560/371472 [10:46:15<19:40:47, 3.33it/s] 36%|███▋ | 135561/371472 [10:46:15<18:25:37, 3.56it/s] 36%|███▋ | 135562/371472 [10:46:15<18:19:01, 3.58it/s] 36%|███▋ | 135563/371472 [10:46:15<19:11:18, 3.42it/s] 36%|███▋ | 135564/371472 [10:46:16<18:33:47, 3.53it/s] 36%|███▋ | 135565/371472 [10:46:16<18:03:25, 3.63it/s] 36%|███▋ | 135566/371472 [10:46:16<18:44:17, 3.50it/s] 36%|███▋ | 135567/371472 [10:46:16<18:34:40, 3.53it/s] 36%|███▋ | 135568/371472 [10:46:17<18:24:33, 3.56it/s] 36%|███▋ | 135569/371472 [10:46:17<17:26:22, 3.76it/s] 36%|███▋ | 135570/371472 [10:46:17<17:44:47, 3.69it/s] 36%|███▋ | 135571/371472 [10:46:18<18:03:19, 3.63it/s] 36%|███▋ | 135572/371472 [10:46:18<18:47:49, 3.49it/s] 36%|███▋ | 135573/371472 [10:46:18<19:00:48, 3.45it/s] 36%|███▋ | 135574/371472 [10:46:19<21:00:36, 3.12it/s] 36%|███▋ | 135575/371472 [10:46:19<19:59:27, 3.28it/s] 36%|███▋ | 135576/371472 [10:46:19<19:11:01, 3.42it/s] 36%|███▋ | 135577/371472 [10:46:19<18:44:30, 3.50it/s] 36%|███▋ | 135578/371472 [10:46:20<19:41:11, 3.33it/s] 36%|███▋ | 135579/371472 [10:46:20<18:48:23, 3.48it/s] 36%|███▋ | 135580/371472 [10:46:20<18:20:49, 3.57it/s] {'loss': 3.1491, 'learning_rate': 6.718255079833652e-07, 'epoch': 5.84} + 36%|███▋ | 135580/371472 [10:46:20<18:20:49, 3.57it/s] 36%|███▋ | 135581/371472 [10:46:21<18:12:19, 3.60it/s] 36%|███▋ | 135582/371472 [10:46:21<17:32:57, 3.73it/s] 36%|███▋ | 135583/371472 [10:46:21<17:59:57, 3.64it/s] 36%|███▋ | 135584/371472 [10:46:21<17:39:58, 3.71it/s] 36%|███▋ | 135585/371472 [10:46:22<20:08:04, 3.25it/s] 36%|███▋ | 135586/371472 [10:46:22<18:53:46, 3.47it/s] 36%|███▋ | 135587/371472 [10:46:22<18:36:26, 3.52it/s] 37%|███▋ | 135588/371472 [10:46:22<17:43:19, 3.70it/s] 37%|███▋ | 135589/371472 [10:46:23<18:24:13, 3.56it/s] 37%|███▋ | 135590/371472 [10:46:23<18:15:31, 3.59it/s] 37%|███▋ | 135591/371472 [10:46:23<17:29:07, 3.75it/s] 37%|███▋ | 135592/371472 [10:46:24<17:05:24, 3.83it/s] 37%|███▋ | 135593/371472 [10:46:24<17:55:49, 3.65it/s] 37%|███▋ | 135594/371472 [10:46:24<17:46:28, 3.69it/s] 37%|███▋ | 135595/371472 [10:46:24<17:04:59, 3.84it/s] 37%|███▋ | 135596/371472 [10:46:25<16:59:50, 3.85it/s] 37%|███▋ | 135597/371472 [10:46:25<16:43:50, 3.92it/s] 37%|███▋ | 135598/371472 [10:46:25<16:45:39, 3.91it/s] 37%|███▋ | 135599/371472 [10:46:25<17:23:08, 3.77it/s] 37%|███▋ | 135600/371472 [10:46:26<17:34:26, 3.73it/s] {'loss': 2.9958, 'learning_rate': 6.717770260078865e-07, 'epoch': 5.84} + 37%|███▋ | 135600/371472 [10:46:26<17:34:26, 3.73it/s] 37%|███▋ | 135601/371472 [10:46:26<18:04:56, 3.62it/s] 37%|███▋ | 135602/371472 [10:46:26<17:18:06, 3.79it/s] 37%|███▋ | 135603/371472 [10:46:26<16:59:15, 3.86it/s] 37%|███▋ | 135604/371472 [10:46:27<17:21:27, 3.77it/s] 37%|███▋ | 135605/371472 [10:46:27<17:04:47, 3.84it/s] 37%|███▋ | 135606/371472 [10:46:27<17:23:06, 3.77it/s] 37%|███▋ | 135607/371472 [10:46:27<17:02:26, 3.84it/s] 37%|███▋ | 135608/371472 [10:46:28<17:48:37, 3.68it/s] 37%|███▋ | 135609/371472 [10:46:28<19:21:36, 3.38it/s] 37%|███▋ | 135610/371472 [10:46:28<19:45:13, 3.32it/s] 37%|███▋ | 135611/371472 [10:46:29<19:26:55, 3.37it/s] 37%|███▋ | 135612/371472 [10:46:29<20:25:28, 3.21it/s] 37%|███▋ | 135613/371472 [10:46:29<21:37:42, 3.03it/s] 37%|███▋ | 135614/371472 [10:46:30<21:16:16, 3.08it/s] 37%|███▋ | 135615/371472 [10:46:30<20:10:27, 3.25it/s] 37%|███▋ | 135616/371472 [10:46:30<19:03:33, 3.44it/s] 37%|███▋ | 135617/371472 [10:46:31<18:36:19, 3.52it/s] 37%|███▋ | 135618/371472 [10:46:31<19:19:46, 3.39it/s] 37%|███▋ | 135619/371472 [10:46:31<18:36:17, 3.52it/s] 37%|███▋ | 135620/371472 [10:46:31<18:10:17, 3.61it/s] {'loss': 3.2048, 'learning_rate': 6.717285440324075e-07, 'epoch': 5.84} + 37%|███▋ | 135620/371472 [10:46:31<18:10:17, 3.61it/s] 37%|███▋ | 135621/371472 [10:46:32<17:57:42, 3.65it/s] 37%|███▋ | 135622/371472 [10:46:32<17:16:00, 3.79it/s] 37%|███▋ | 135623/371472 [10:46:32<18:00:13, 3.64it/s] 37%|███▋ | 135624/371472 [10:46:32<17:33:42, 3.73it/s] 37%|███▋ | 135625/371472 [10:46:33<18:00:18, 3.64it/s] 37%|███▋ | 135626/371472 [10:46:33<17:20:48, 3.78it/s] 37%|███▋ | 135627/371472 [10:46:33<17:18:19, 3.79it/s] 37%|███▋ | 135628/371472 [10:46:33<17:07:33, 3.83it/s] 37%|███▋ | 135629/371472 [10:46:34<16:43:10, 3.92it/s] 37%|███▋ | 135630/371472 [10:46:34<17:11:59, 3.81it/s] 37%|███▋ | 135631/371472 [10:46:34<17:27:05, 3.75it/s] 37%|███▋ | 135632/371472 [10:46:35<18:37:16, 3.52it/s] 37%|███▋ | 135633/371472 [10:46:35<19:18:03, 3.39it/s] 37%|███▋ | 135634/371472 [10:46:35<18:14:34, 3.59it/s] 37%|███▋ | 135635/371472 [10:46:35<18:52:40, 3.47it/s] 37%|███▋ | 135636/371472 [10:46:36<17:55:14, 3.66it/s] 37%|███▋ | 135637/371472 [10:46:36<17:46:57, 3.68it/s] 37%|███▋ | 135638/371472 [10:46:36<18:05:03, 3.62it/s] 37%|███▋ | 135639/371472 [10:46:37<17:08:49, 3.82it/s] 37%|███▋ | 135640/371472 [10:46:37<18:03:10, 3.63it/s] {'loss': 3.1827, 'learning_rate': 6.716800620569285e-07, 'epoch': 5.84} + 37%|███▋ | 135640/371472 [10:46:37<18:03:10, 3.63it/s] 37%|███▋ | 135641/371472 [10:46:37<19:36:25, 3.34it/s] 37%|███▋ | 135642/371472 [10:46:37<19:06:02, 3.43it/s] 37%|███▋ | 135643/371472 [10:46:38<18:35:56, 3.52it/s] 37%|███▋ | 135644/371472 [10:46:38<17:48:24, 3.68it/s] 37%|███▋ | 135645/371472 [10:46:38<17:32:00, 3.74it/s] 37%|███▋ | 135646/371472 [10:46:39<19:53:57, 3.29it/s] 37%|███▋ | 135647/371472 [10:46:39<18:40:27, 3.51it/s] 37%|███▋ | 135648/371472 [10:46:39<19:05:28, 3.43it/s] 37%|███▋ | 135649/371472 [10:46:39<19:51:46, 3.30it/s] 37%|███▋ | 135650/371472 [10:46:40<19:24:25, 3.38it/s] 37%|███▋ | 135651/371472 [10:46:40<18:57:22, 3.46it/s] 37%|███▋ | 135652/371472 [10:46:40<18:34:13, 3.53it/s] 37%|███▋ | 135653/371472 [10:46:41<18:35:57, 3.52it/s] 37%|███▋ | 135654/371472 [10:46:41<18:01:06, 3.64it/s] 37%|███▋ | 135655/371472 [10:46:41<20:20:18, 3.22it/s] 37%|███▋ | 135656/371472 [10:46:42<19:44:22, 3.32it/s] 37%|███▋ | 135657/371472 [10:46:42<19:24:27, 3.38it/s] 37%|███▋ | 135658/371472 [10:46:42<22:17:15, 2.94it/s] 37%|███▋ | 135659/371472 [10:46:43<20:48:38, 3.15it/s] 37%|███▋ | 135660/371472 [10:46:43<19:58:15, 3.28it/s] {'loss': 3.0809, 'learning_rate': 6.716315800814497e-07, 'epoch': 5.84} + 37%|███▋ | 135660/371472 [10:46:43<19:58:15, 3.28it/s] 37%|███▋ | 135661/371472 [10:46:43<18:52:44, 3.47it/s] 37%|███▋ | 135662/371472 [10:46:43<18:49:29, 3.48it/s] 37%|███▋ | 135663/371472 [10:46:44<18:22:14, 3.57it/s] 37%|███▋ | 135664/371472 [10:46:44<18:23:37, 3.56it/s] 37%|███▋ | 135665/371472 [10:46:44<17:53:41, 3.66it/s] 37%|███▋ | 135666/371472 [10:46:44<18:14:49, 3.59it/s] 37%|███▋ | 135667/371472 [10:46:45<17:20:49, 3.78it/s] 37%|███▋ | 135668/371472 [10:46:45<17:16:17, 3.79it/s] 37%|███▋ | 135669/371472 [10:46:45<16:51:33, 3.89it/s] 37%|███▋ | 135670/371472 [10:46:45<17:28:35, 3.75it/s] 37%|███▋ | 135671/371472 [10:46:46<18:25:29, 3.56it/s] 37%|███▋ | 135672/371472 [10:46:46<18:48:20, 3.48it/s] 37%|███▋ | 135673/371472 [10:46:46<19:24:35, 3.37it/s] 37%|███▋ | 135674/371472 [10:46:47<19:17:48, 3.39it/s] 37%|███▋ | 135675/371472 [10:46:47<18:28:07, 3.55it/s] 37%|███▋ | 135676/371472 [10:46:47<18:19:26, 3.57it/s] 37%|███▋ | 135677/371472 [10:46:47<17:52:08, 3.67it/s] 37%|███▋ | 135678/371472 [10:46:48<17:54:59, 3.66it/s] 37%|███▋ | 135679/371472 [10:46:48<17:58:09, 3.64it/s] 37%|███▋ | 135680/371472 [10:46:48<18:28:56, 3.54it/s] {'loss': 3.0137, 'learning_rate': 6.71583098105971e-07, 'epoch': 5.84} + 37%|███▋ | 135680/371472 [10:46:48<18:28:56, 3.54it/s] 37%|███▋ | 135681/371472 [10:46:49<17:49:58, 3.67it/s] 37%|███▋ | 135682/371472 [10:46:49<18:49:38, 3.48it/s] 37%|███▋ | 135683/371472 [10:46:49<19:28:06, 3.36it/s] 37%|███▋ | 135684/371472 [10:46:50<20:53:09, 3.14it/s] 37%|███▋ | 135685/371472 [10:46:50<20:42:47, 3.16it/s] 37%|███▋ | 135686/371472 [10:46:50<20:22:25, 3.21it/s] 37%|███▋ | 135687/371472 [10:46:50<20:25:33, 3.21it/s] 37%|███▋ | 135688/371472 [10:46:51<20:02:19, 3.27it/s] 37%|███▋ | 135689/371472 [10:46:51<19:30:43, 3.36it/s] 37%|███▋ | 135690/371472 [10:46:51<18:59:49, 3.45it/s] 37%|███▋ | 135691/371472 [10:46:52<18:07:36, 3.61it/s] 37%|███▋ | 135692/371472 [10:46:52<18:23:06, 3.56it/s] 37%|███▋ | 135693/371472 [10:46:52<18:25:05, 3.56it/s] 37%|███▋ | 135694/371472 [10:46:52<18:03:07, 3.63it/s] 37%|███▋ | 135695/371472 [10:46:53<17:40:10, 3.71it/s] 37%|███▋ | 135696/371472 [10:46:53<17:24:49, 3.76it/s] 37%|███▋ | 135697/371472 [10:46:53<17:21:16, 3.77it/s] 37%|███▋ | 135698/371472 [10:46:53<17:35:47, 3.72it/s] 37%|███▋ | 135699/371472 [10:46:54<18:05:25, 3.62it/s] 37%|███▋ | 135700/371472 [10:46:54<18:15:44, 3.59it/s] {'loss': 3.0158, 'learning_rate': 6.715346161304918e-07, 'epoch': 5.84} + 37%|███▋ | 135700/371472 [10:46:54<18:15:44, 3.59it/s] 37%|███▋ | 135701/371472 [10:46:54<17:40:24, 3.71it/s] 37%|███▋ | 135702/371472 [10:46:55<17:11:14, 3.81it/s] 37%|███▋ | 135703/371472 [10:46:55<17:41:11, 3.70it/s] 37%|███▋ | 135704/371472 [10:46:55<17:52:24, 3.66it/s] 37%|███▋ | 135705/371472 [10:46:55<18:08:29, 3.61it/s] 37%|███▋ | 135706/371472 [10:46:56<18:37:11, 3.52it/s] 37%|███▋ | 135707/371472 [10:46:56<21:00:14, 3.12it/s] 37%|███▋ | 135708/371472 [10:46:56<19:49:43, 3.30it/s] 37%|███▋ | 135709/371472 [10:46:57<19:26:10, 3.37it/s] 37%|███▋ | 135710/371472 [10:46:57<18:40:49, 3.51it/s] 37%|███▋ | 135711/371472 [10:46:57<18:31:31, 3.54it/s] 37%|███▋ | 135712/371472 [10:46:57<19:00:48, 3.44it/s] 37%|███▋ | 135713/371472 [10:46:58<18:48:49, 3.48it/s] 37%|███▋ | 135714/371472 [10:46:58<19:11:24, 3.41it/s] 37%|███▋ | 135715/371472 [10:46:58<18:59:49, 3.45it/s] 37%|███▋ | 135716/371472 [10:46:59<18:19:45, 3.57it/s] 37%|███▋ | 135717/371472 [10:46:59<17:51:21, 3.67it/s] 37%|███▋ | 135718/371472 [10:46:59<17:51:56, 3.67it/s] 37%|███▋ | 135719/371472 [10:46:59<17:32:53, 3.73it/s] 37%|███▋ | 135720/371472 [10:47:00<17:12:06, 3.81it/s] {'loss': 3.1452, 'learning_rate': 6.71486134155013e-07, 'epoch': 5.85} + 37%|███▋ | 135720/371472 [10:47:00<17:12:06, 3.81it/s] 37%|███▋ | 135721/371472 [10:47:00<18:04:17, 3.62it/s] 37%|███▋ | 135722/371472 [10:47:00<18:53:07, 3.47it/s] 37%|███▋ | 135723/371472 [10:47:01<19:55:58, 3.29it/s] 37%|███▋ | 135724/371472 [10:47:01<20:24:52, 3.21it/s] 37%|███▋ | 135725/371472 [10:47:01<20:09:43, 3.25it/s] 37%|███▋ | 135726/371472 [10:47:01<19:11:34, 3.41it/s] 37%|███▋ | 135727/371472 [10:47:02<18:47:31, 3.48it/s] 37%|███▋ | 135728/371472 [10:47:02<18:11:31, 3.60it/s] 37%|███▋ | 135729/371472 [10:47:02<17:44:05, 3.69it/s] 37%|███▋ | 135730/371472 [10:47:03<18:08:38, 3.61it/s] 37%|███▋ | 135731/371472 [10:47:03<19:02:46, 3.44it/s] 37%|███▋ | 135732/371472 [10:47:03<21:07:37, 3.10it/s] 37%|███▋ | 135733/371472 [10:47:04<21:07:43, 3.10it/s] 37%|███▋ | 135734/371472 [10:47:04<19:49:48, 3.30it/s] 37%|███▋ | 135735/371472 [10:47:04<19:21:32, 3.38it/s] 37%|███▋ | 135736/371472 [10:47:04<18:37:54, 3.51it/s] 37%|███▋ | 135737/371472 [10:47:05<18:02:30, 3.63it/s] 37%|███▋ | 135738/371472 [10:47:05<22:34:03, 2.90it/s] 37%|███▋ | 135739/371472 [10:47:05<20:28:06, 3.20it/s] 37%|███▋ | 135740/371472 [10:47:06<20:43:33, 3.16it/s] {'loss': 3.0691, 'learning_rate': 6.714376521795342e-07, 'epoch': 5.85} + 37%|███▋ | 135740/371472 [10:47:06<20:43:33, 3.16it/s] 37%|███▋ | 135741/371472 [10:47:06<21:20:41, 3.07it/s] 37%|███▋ | 135742/371472 [10:47:06<22:52:10, 2.86it/s] 37%|███▋ | 135743/371472 [10:47:07<21:31:24, 3.04it/s] 37%|███▋ | 135744/371472 [10:47:07<20:02:07, 3.27it/s] 37%|███▋ | 135745/371472 [10:47:07<20:19:13, 3.22it/s] 37%|███▋ | 135746/371472 [10:47:08<19:02:31, 3.44it/s] 37%|███▋ | 135747/371472 [10:47:08<18:29:07, 3.54it/s] 37%|███▋ | 135748/371472 [10:47:08<17:51:39, 3.67it/s] 37%|███▋ | 135749/371472 [10:47:08<18:15:06, 3.59it/s] 37%|███▋ | 135750/371472 [10:47:09<17:51:41, 3.67it/s] 37%|███▋ | 135751/371472 [10:47:09<17:31:51, 3.73it/s] 37%|███▋ | 135752/371472 [10:47:09<18:19:38, 3.57it/s] 37%|███▋ | 135753/371472 [10:47:09<18:22:27, 3.56it/s] 37%|███▋ | 135754/371472 [10:47:10<19:06:28, 3.43it/s] 37%|███▋ | 135755/371472 [10:47:10<18:16:27, 3.58it/s] 37%|███▋ | 135756/371472 [10:47:10<17:55:31, 3.65it/s] 37%|███▋ | 135757/371472 [10:47:11<18:01:04, 3.63it/s] 37%|███▋ | 135758/371472 [10:47:11<20:21:13, 3.22it/s] 37%|███▋ | 135759/371472 [10:47:11<19:48:02, 3.31it/s] 37%|███▋ | 135760/371472 [10:47:12<20:35:19, 3.18it/s] {'loss': 3.2708, 'learning_rate': 6.713891702040553e-07, 'epoch': 5.85} + 37%|███▋ | 135760/371472 [10:47:12<20:35:19, 3.18it/s] 37%|███▋ | 135761/371472 [10:47:12<19:49:43, 3.30it/s] 37%|███▋ | 135762/371472 [10:47:12<18:27:11, 3.55it/s] 37%|███▋ | 135763/371472 [10:47:12<18:56:30, 3.46it/s] 37%|███▋ | 135764/371472 [10:47:13<18:31:52, 3.53it/s] 37%|███▋ | 135765/371472 [10:47:13<19:16:40, 3.40it/s] 37%|███▋ | 135766/371472 [10:47:13<19:18:38, 3.39it/s] 37%|███▋ | 135767/371472 [10:47:14<19:16:41, 3.40it/s] 37%|███▋ | 135768/371472 [10:47:14<18:45:48, 3.49it/s] 37%|███▋ | 135769/371472 [10:47:14<18:26:36, 3.55it/s] 37%|███▋ | 135770/371472 [10:47:14<18:42:19, 3.50it/s] 37%|███▋ | 135771/371472 [10:47:15<19:11:59, 3.41it/s] 37%|███▋ | 135772/371472 [10:47:15<19:11:43, 3.41it/s] 37%|███▋ | 135773/371472 [10:47:15<18:23:29, 3.56it/s] 37%|███▋ | 135774/371472 [10:47:16<17:52:24, 3.66it/s] 37%|███▋ | 135775/371472 [10:47:16<17:59:01, 3.64it/s] 37%|███▋ | 135776/371472 [10:47:16<17:37:24, 3.72it/s] 37%|███▋ | 135777/371472 [10:47:16<18:38:39, 3.51it/s] 37%|███▋ | 135778/371472 [10:47:17<17:48:53, 3.68it/s] 37%|███▋ | 135779/371472 [10:47:17<18:44:17, 3.49it/s] 37%|███▋ | 135780/371472 [10:47:17<18:06:03, 3.62it/s] {'loss': 3.1081, 'learning_rate': 6.713406882285762e-07, 'epoch': 5.85} + 37%|███▋ | 135780/371472 [10:47:17<18:06:03, 3.62it/s] 37%|███▋ | 135781/371472 [10:47:18<18:20:43, 3.57it/s] 37%|███▋ | 135782/371472 [10:47:18<18:37:02, 3.52it/s] 37%|███▋ | 135783/371472 [10:47:18<18:18:22, 3.58it/s] 37%|███▋ | 135784/371472 [10:47:18<17:55:16, 3.65it/s] 37%|███▋ | 135785/371472 [10:47:19<19:54:31, 3.29it/s] 37%|███▋ | 135786/371472 [10:47:19<19:43:06, 3.32it/s] 37%|███▋ | 135787/371472 [10:47:19<18:34:07, 3.53it/s] 37%|███▋ | 135788/371472 [10:47:20<19:01:32, 3.44it/s] 37%|███▋ | 135789/371472 [10:47:20<20:01:47, 3.27it/s] 37%|██��▋ | 135790/371472 [10:47:20<19:05:10, 3.43it/s] 37%|███▋ | 135791/371472 [10:47:20<18:19:47, 3.57it/s] 37%|███▋ | 135792/371472 [10:47:21<18:26:34, 3.55it/s] 37%|███▋ | 135793/371472 [10:47:21<21:23:42, 3.06it/s] 37%|███▋ | 135794/371472 [10:47:21<21:17:07, 3.08it/s] 37%|███▋ | 135795/371472 [10:47:22<19:47:59, 3.31it/s] 37%|███▋ | 135796/371472 [10:47:22<19:21:43, 3.38it/s] 37%|███▋ | 135797/371472 [10:47:22<19:46:47, 3.31it/s] 37%|███▋ | 135798/371472 [10:47:23<18:31:55, 3.53it/s] 37%|███▋ | 135799/371472 [10:47:23<18:22:40, 3.56it/s] 37%|███▋ | 135800/371472 [10:47:23<19:14:58, 3.40it/s] {'loss': 3.166, 'learning_rate': 6.712922062530974e-07, 'epoch': 5.85} + 37%|███▋ | 135800/371472 [10:47:23<19:14:58, 3.40it/s] 37%|███▋ | 135801/371472 [10:47:23<20:40:08, 3.17it/s] 37%|███▋ | 135802/371472 [10:47:24<19:59:38, 3.27it/s] 37%|███▋ | 135803/371472 [10:47:24<19:11:40, 3.41it/s] 37%|███▋ | 135804/371472 [10:47:24<18:04:07, 3.62it/s] 37%|███▋ | 135805/371472 [10:47:25<17:27:30, 3.75it/s] 37%|███▋ | 135806/371472 [10:47:25<18:46:21, 3.49it/s] 37%|███▋ | 135807/371472 [10:47:25<19:00:48, 3.44it/s] 37%|███▋ | 135808/371472 [10:47:25<19:12:24, 3.41it/s] 37%|███▋ | 135809/371472 [10:47:26<18:45:41, 3.49it/s] 37%|███▋ | 135810/371472 [10:47:26<18:55:20, 3.46it/s] 37%|███▋ | 135811/371472 [10:47:26<18:14:24, 3.59it/s] 37%|███▋ | 135812/371472 [10:47:27<18:10:29, 3.60it/s] 37%|███▋ | 135813/371472 [10:47:27<17:38:04, 3.71it/s] 37%|███▋ | 135814/371472 [10:47:27<17:41:57, 3.70it/s] 37%|███▋ | 135815/371472 [10:47:27<18:14:43, 3.59it/s] 37%|███▋ | 135816/371472 [10:47:28<17:16:48, 3.79it/s] 37%|███▋ | 135817/371472 [10:47:28<16:53:31, 3.88it/s] 37%|███▋ | 135818/371472 [10:47:28<18:04:52, 3.62it/s] 37%|███▋ | 135819/371472 [10:47:28<17:32:38, 3.73it/s] 37%|███▋ | 135820/371472 [10:47:29<17:20:23, 3.78it/s] {'loss': 3.1383, 'learning_rate': 6.712437242776186e-07, 'epoch': 5.85} + 37%|███▋ | 135820/371472 [10:47:29<17:20:23, 3.78it/s] 37%|███▋ | 135821/371472 [10:47:29<17:32:08, 3.73it/s] 37%|███▋ | 135822/371472 [10:47:29<17:43:34, 3.69it/s] 37%|███▋ | 135823/371472 [10:47:29<16:59:29, 3.85it/s] 37%|███▋ | 135824/371472 [10:47:30<17:33:29, 3.73it/s] 37%|███▋ | 135825/371472 [10:47:30<17:15:02, 3.79it/s] 37%|███▋ | 135826/371472 [10:47:30<17:02:37, 3.84it/s] 37%|███▋ | 135827/371472 [10:47:31<17:38:10, 3.71it/s] 37%|███▋ | 135828/371472 [10:47:31<17:38:34, 3.71it/s] 37%|███▋ | 135829/371472 [10:47:31<17:16:50, 3.79it/s] 37%|███▋ | 135830/371472 [10:47:31<16:56:11, 3.86it/s] 37%|███▋ | 135831/371472 [10:47:32<17:07:45, 3.82it/s] 37%|███▋ | 135832/371472 [10:47:32<17:34:00, 3.73it/s] 37%|███▋ | 135833/371472 [10:47:32<17:54:25, 3.66it/s] 37%|███▋ | 135834/371472 [10:47:32<17:29:53, 3.74it/s] 37%|███▋ | 135835/371472 [10:47:33<17:29:14, 3.74it/s] 37%|███▋ | 135836/371472 [10:47:33<17:41:01, 3.70it/s] 37%|███▋ | 135837/371472 [10:47:33<18:33:08, 3.53it/s] 37%|███▋ | 135838/371472 [10:47:34<19:46:59, 3.31it/s] 37%|███▋ | 135839/371472 [10:47:34<19:12:46, 3.41it/s] 37%|███▋ | 135840/371472 [10:47:34<19:17:11, 3.39it/s] {'loss': 3.2279, 'learning_rate': 6.711952423021396e-07, 'epoch': 5.85} + 37%|███▋ | 135840/371472 [10:47:34<19:17:11, 3.39it/s] 37%|███▋ | 135841/371472 [10:47:35<20:46:27, 3.15it/s] 37%|███▋ | 135842/371472 [10:47:35<19:12:17, 3.41it/s] 37%|███▋ | 135843/371472 [10:47:35<18:02:00, 3.63it/s] 37%|███▋ | 135844/371472 [10:47:35<17:53:58, 3.66it/s] 37%|███▋ | 135845/371472 [10:47:36<17:20:47, 3.77it/s] 37%|███▋ | 135846/371472 [10:47:36<16:57:19, 3.86it/s] 37%|███▋ | 135847/371472 [10:47:36<16:57:32, 3.86it/s] 37%|███▋ | 135848/371472 [10:47:36<17:52:28, 3.66it/s] 37%|███▋ | 135849/371472 [10:47:37<17:57:22, 3.64it/s] 37%|███▋ | 135850/371472 [10:47:37<17:43:37, 3.69it/s] 37%|███▋ | 135851/371472 [10:47:37<17:46:01, 3.68it/s] 37%|███▋ | 135852/371472 [10:47:37<17:14:21, 3.80it/s] 37%|███▋ | 135853/371472 [10:47:38<16:51:14, 3.88it/s] 37%|███▋ | 135854/371472 [10:47:38<17:10:54, 3.81it/s] 37%|███▋ | 135855/371472 [10:47:38<17:32:18, 3.73it/s] 37%|███▋ | 135856/371472 [10:47:39<20:52:01, 3.14it/s] 37%|███▋ | 135857/371472 [10:47:39<20:10:00, 3.25it/s] 37%|███▋ | 135858/371472 [10:47:39<19:00:36, 3.44it/s] 37%|███▋ | 135859/371472 [10:47:39<19:25:08, 3.37it/s] 37%|███▋ | 135860/371472 [10:47:40<20:24:58, 3.21it/s] {'loss': 3.009, 'learning_rate': 6.711467603266607e-07, 'epoch': 5.85} + 37%|███▋ | 135860/371472 [10:47:40<20:24:58, 3.21it/s] 37%|███▋ | 135861/371472 [10:47:40<20:51:22, 3.14it/s] 37%|███▋ | 135862/371472 [10:47:40<19:14:50, 3.40it/s] 37%|███▋ | 135863/371472 [10:47:41<18:57:54, 3.45it/s] 37%|███▋ | 135864/371472 [10:47:41<19:37:18, 3.34it/s] 37%|███▋ | 135865/371472 [10:47:41<19:10:06, 3.41it/s] 37%|███▋ | 135866/371472 [10:47:42<19:07:59, 3.42it/s] 37%|███▋ | 135867/371472 [10:47:42<18:54:45, 3.46it/s] 37%|███▋ | 135868/371472 [10:47:42<20:36:06, 3.18it/s] 37%|███▋ | 135869/371472 [10:47:42<19:44:46, 3.31it/s] 37%|███▋ | 135870/371472 [10:47:43<18:43:04, 3.50it/s] 37%|███▋ | 135871/371472 [10:47:43<18:18:15, 3.58it/s] 37%|███▋ | 135872/371472 [10:47:43<18:00:47, 3.63it/s] 37%|███▋ | 135873/371472 [10:47:44<18:26:24, 3.55it/s] 37%|███▋ | 135874/371472 [10:47:44<18:57:34, 3.45it/s] 37%|███▋ | 135875/371472 [10:47:44<18:30:44, 3.54it/s] 37%|███▋ | 135876/371472 [10:47:44<18:32:49, 3.53it/s] 37%|███▋ | 135877/371472 [10:47:45<19:46:31, 3.31it/s] 37%|███▋ | 135878/371472 [10:47:45<19:00:01, 3.44it/s] 37%|███▋ | 135879/371472 [10:47:45<18:30:27, 3.54it/s] 37%|███▋ | 135880/371472 [10:47:46<17:44:58, 3.69it/s] {'loss': 3.1181, 'learning_rate': 6.710982783511819e-07, 'epoch': 5.85} + 37%|███▋ | 135880/371472 [10:47:46<17:44:58, 3.69it/s] 37%|███▋ | 135881/371472 [10:47:46<17:16:54, 3.79it/s] 37%|███▋ | 135882/371472 [10:47:46<17:50:44, 3.67it/s] 37%|███▋ | 135883/371472 [10:47:46<18:21:30, 3.56it/s] 37%|███▋ | 135884/371472 [10:47:47<17:43:54, 3.69it/s] 37%|███▋ | 135885/371472 [10:47:47<17:49:54, 3.67it/s] 37%|███▋ | 135886/371472 [10:47:47<17:41:12, 3.70it/s] 37%|███▋ | 135887/371472 [10:47:47<17:51:38, 3.66it/s] 37%|███▋ | 135888/371472 [10:47:48<18:09:24, 3.60it/s] 37%|███▋ | 135889/371472 [10:47:48<18:19:13, 3.57it/s] 37%|███▋ | 135890/371472 [10:47:48<18:38:16, 3.51it/s] 37%|███▋ | 135891/371472 [10:47:49<17:58:08, 3.64it/s] 37%|███▋ | 135892/371472 [10:47:49<18:34:21, 3.52it/s] 37%|███▋ | 135893/371472 [10:47:49<17:56:28, 3.65it/s] 37%|███▋ | 135894/371472 [10:47:49<18:07:54, 3.61it/s] 37%|███▋ | 135895/371472 [10:47:50<17:56:35, 3.65it/s] 37%|███▋ | 135896/371472 [10:47:50<18:16:14, 3.58it/s] 37%|███▋ | 135897/371472 [10:47:50<17:53:04, 3.66it/s] 37%|███▋ | 135898/371472 [10:47:51<18:09:16, 3.60it/s] 37%|███▋ | 135899/371472 [10:47:51<19:00:18, 3.44it/s] 37%|███▋ | 135900/371472 [10:47:51<18:40:33, 3.50it/s] {'loss': 3.1606, 'learning_rate': 6.71049796375703e-07, 'epoch': 5.85} + 37%|███▋ | 135900/371472 [10:47:51<18:40:33, 3.50it/s] 37%|███▋ | 135901/371472 [10:47:51<17:44:27, 3.69it/s] 37%|███▋ | 135902/371472 [10:47:52<19:05:56, 3.43it/s] 37%|███▋ | 135903/371472 [10:47:52<18:24:40, 3.55it/s] 37%|███▋ | 135904/371472 [10:47:52<17:52:18, 3.66it/s] 37%|███▋ | 135905/371472 [10:47:52<17:23:45, 3.76it/s] 37%|███▋ | 135906/371472 [10:47:53<17:52:46, 3.66it/s] 37%|███▋ | 135907/371472 [10:47:53<18:00:44, 3.63it/s] 37%|███▋ | 135908/371472 [10:47:53<17:38:36, 3.71it/s] 37%|███▋ | 135909/371472 [10:47:54<18:30:52, 3.53it/s] 37%|███▋ | 135910/371472 [10:47:54<17:49:38, 3.67it/s] 37%|███▋ | 135911/371472 [10:47:54<18:08:36, 3.61it/s] 37%|███▋ | 135912/371472 [10:47:54<19:19:54, 3.38it/s] 37%|███▋ | 135913/371472 [10:47:55<19:08:55, 3.42it/s] 37%|███▋ | 135914/371472 [10:47:55<18:51:32, 3.47it/s] 37%|███▋ | 135915/371472 [10:47:55<19:07:46, 3.42it/s] 37%|███▋ | 135916/371472 [10:47:56<19:23:12, 3.38it/s] 37%|███▋ | 135917/371472 [10:47:56<19:18:17, 3.39it/s] 37%|███▋ | 135918/371472 [10:47:56<21:07:19, 3.10it/s] 37%|███▋ | 135919/371472 [10:47:57<19:52:28, 3.29it/s] 37%|███▋ | 135920/371472 [10:47:57<18:50:51, 3.47it/s] {'loss': 3.0974, 'learning_rate': 6.71001314400224e-07, 'epoch': 5.85} + 37%|███▋ | 135920/371472 [10:47:57<18:50:51, 3.47it/s] 37%|███▋ | 135921/371472 [10:47:57<18:34:23, 3.52it/s] 37%|███▋ | 135922/371472 [10:47:57<18:19:25, 3.57it/s] 37%|███▋ | 135923/371472 [10:47:58<18:30:09, 3.54it/s] 37%|███▋ | 135924/371472 [10:47:58<18:46:40, 3.48it/s] 37%|███▋ | 135925/371472 [10:47:58<18:27:38, 3.54it/s] 37%|███▋ | 135926/371472 [10:47:59<18:16:16, 3.58it/s] 37%|███▋ | 135927/371472 [10:47:59<18:07:12, 3.61it/s] 37%|███▋ | 135928/371472 [10:47:59<17:16:45, 3.79it/s] 37%|███▋ | 135929/371472 [10:47:59<19:40:39, 3.33it/s] 37%|███▋ | 135930/371472 [10:48:00<20:07:04, 3.25it/s] 37%|███▋ | 135931/371472 [10:48:00<21:57:56, 2.98it/s] 37%|███▋ | 135932/371472 [10:48:00<20:32:54, 3.18it/s] 37%|███▋ | 135933/371472 [10:48:01<19:10:21, 3.41it/s] 37%|███▋ | 135934/371472 [10:48:01<19:09:30, 3.42it/s] 37%|███▋ | 135935/371472 [10:48:01<18:23:28, 3.56it/s] 37%|███▋ | 135936/371472 [10:48:01<17:54:08, 3.65it/s] 37%|███▋ | 135937/371472 [10:48:02<18:26:35, 3.55it/s] 37%|███▋ | 135938/371472 [10:48:02<17:21:43, 3.77it/s] 37%|███▋ | 135939/371472 [10:48:02<17:11:13, 3.81it/s] 37%|███▋ | 135940/371472 [10:48:02<17:36:40, 3.72it/s] {'loss': 3.0554, 'learning_rate': 6.709528324247451e-07, 'epoch': 5.86} + 37%|███▋ | 135940/371472 [10:48:03<17:36:40, 3.72it/s] 37%|███▋ | 135941/371472 [10:48:03<17:46:31, 3.68it/s] 37%|███▋ | 135942/371472 [10:48:03<17:38:43, 3.71it/s] 37%|███▋ | 135943/371472 [10:48:03<17:28:06, 3.75it/s] 37%|███▋ | 135944/371472 [10:48:04<18:37:03, 3.51it/s] 37%|███▋ | 135945/371472 [10:48:04<17:51:50, 3.66it/s] 37%|███▋ | 135946/371472 [10:48:04<17:34:12, 3.72it/s] 37%|███▋ | 135947/371472 [10:48:04<19:05:36, 3.43it/s] 37%|███▋ | 135948/371472 [10:48:05<18:28:42, 3.54it/s] 37%|███▋ | 135949/371472 [10:48:05<18:32:53, 3.53it/s] 37%|███▋ | 135950/371472 [10:48:05<18:53:16, 3.46it/s] 37%|███▋ | 135951/371472 [10:48:06<19:26:29, 3.37it/s] 37%|███▋ | 135952/371472 [10:48:06<19:28:51, 3.36it/s] 37%|███▋ | 135953/371472 [10:48:06<18:26:13, 3.55it/s] 37%|███▋ | 135954/371472 [10:48:06<17:40:42, 3.70it/s] 37%|███▋ | 135955/371472 [10:48:07<17:29:20, 3.74it/s] 37%|███▋ | 135956/371472 [10:48:07<18:30:21, 3.54it/s] 37%|███▋ | 135957/371472 [10:48:07<18:20:11, 3.57it/s] 37%|███▋ | 135958/371472 [10:48:08<18:32:19, 3.53it/s] 37%|███▋ | 135959/371472 [10:48:08<18:37:49, 3.51it/s] 37%|███▋ | 135960/371472 [10:48:08<18:08:57, 3.60it/s] {'loss': 3.292, 'learning_rate': 6.709043504492663e-07, 'epoch': 5.86} + 37%|███▋ | 135960/371472 [10:48:08<18:08:57, 3.60it/s] 37%|███▋ | 135961/371472 [10:48:08<18:31:28, 3.53it/s] 37%|███▋ | 135962/371472 [10:48:09<18:59:37, 3.44it/s] 37%|███▋ | 135963/371472 [10:48:09<18:02:14, 3.63it/s] 37%|███▋ | 135964/371472 [10:48:09<17:38:51, 3.71it/s] 37%|███▋ | 135965/371472 [10:48:10<19:18:56, 3.39it/s] 37%|███▋ | 135966/371472 [10:48:10<20:25:35, 3.20it/s] 37%|███▋ | 135967/371472 [10:48:10<20:17:06, 3.22it/s] 37%|███▋ | 135968/371472 [10:48:10<19:18:33, 3.39it/s] 37%|███▋ | 135969/371472 [10:48:11<18:50:54, 3.47it/s] 37%|███▋ | 135970/371472 [10:48:11<18:01:59, 3.63it/s] 37%|███▋ | 135971/371472 [10:48:11<18:08:19, 3.61it/s] 37%|███▋ | 135972/371472 [10:48:12<18:53:58, 3.46it/s] 37%|███▋ | 135973/371472 [10:48:12<18:25:58, 3.55it/s] 37%|███▋ | 135974/371472 [10:48:12<18:44:47, 3.49it/s] 37%|███▋ | 135975/371472 [10:48:12<18:49:58, 3.47it/s] 37%|███▋ | 135976/371472 [10:48:13<18:50:26, 3.47it/s] 37%|███▋ | 135977/371472 [10:48:13<18:44:20, 3.49it/s] 37%|███▋ | 135978/371472 [10:48:13<18:16:33, 3.58it/s] 37%|███▋ | 135979/371472 [10:48:14<18:35:09, 3.52it/s] 37%|███▋ | 135980/371472 [10:48:14<17:32:25, 3.73it/s] {'loss': 3.1406, 'learning_rate': 6.708558684737875e-07, 'epoch': 5.86} + 37%|███▋ | 135980/371472 [10:48:14<17:32:25, 3.73it/s] 37%|███▋ | 135981/371472 [10:48:14<18:10:09, 3.60it/s] 37%|███▋ | 135982/371472 [10:48:14<18:07:42, 3.61it/s] 37%|███▋ | 135983/371472 [10:48:15<18:52:03, 3.47it/s] 37%|███▋ | 135984/371472 [10:48:15<18:44:21, 3.49it/s] 37%|███▋ | 135985/371472 [10:48:15<18:18:08, 3.57it/s] 37%|███▋ | 135986/371472 [10:48:16<17:53:36, 3.66it/s] 37%|███▋ | 135987/371472 [10:48:16<21:45:06, 3.01it/s] 37%|███▋ | 135988/371472 [10:48:16<22:06:37, 2.96it/s] 37%|███▋ | 135989/371472 [10:48:17<20:36:00, 3.18it/s] 37%|███▋ | 135990/371472 [10:48:17<19:47:19, 3.31it/s] 37%|███▋ | 135991/371472 [10:48:17<19:16:24, 3.39it/s] 37%|███▋ | 135992/371472 [10:48:17<18:39:45, 3.50it/s] 37%|███▋ | 135993/371472 [10:48:18<18:20:48, 3.57it/s] 37%|███▋ | 135994/371472 [10:48:18<18:32:33, 3.53it/s] 37%|███▋ | 135995/371472 [10:48:18<20:21:28, 3.21it/s] 37%|███▋ | 135996/371472 [10:48:19<20:59:26, 3.12it/s] 37%|███▋ | 135997/371472 [10:48:19<19:22:02, 3.38it/s] 37%|███▋ | 135998/371472 [10:48:19<18:17:51, 3.57it/s] 37%|███▋ | 135999/371472 [10:48:19<18:02:36, 3.63it/s] 37%|███▋ | 136000/371472 [10:48:20<18:48:39, 3.48it/s] {'loss': 3.2363, 'learning_rate': 6.708073864983085e-07, 'epoch': 5.86} + 37%|███▋ | 136000/371472 [10:48:20<18:48:39, 3.48it/s] 37%|███▋ | 136001/371472 [10:48:20<18:36:41, 3.51it/s] 37%|███▋ | 136002/371472 [10:48:20<17:51:46, 3.66it/s] 37%|███▋ | 136003/371472 [10:48:21<18:07:26, 3.61it/s] 37%|███▋ | 136004/371472 [10:48:21<17:56:52, 3.64it/s] 37%|███▋ | 136005/371472 [10:48:21<18:26:35, 3.55it/s] 37%|███▋ | 136006/371472 [10:48:21<17:55:40, 3.65it/s] 37%|███▋ | 136007/371472 [10:48:22<17:27:50, 3.75it/s] 37%|███▋ | 136008/371472 [10:48:22<17:30:55, 3.73it/s] 37%|███▋ | 136009/371472 [10:48:22<18:21:51, 3.56it/s] 37%|███▋ | 136010/371472 [10:48:22<18:11:04, 3.60it/s] 37%|███▋ | 136011/371472 [10:48:23<17:56:00, 3.65it/s] 37%|███▋ | 136012/371472 [10:48:23<18:42:48, 3.50it/s] 37%|███▋ | 136013/371472 [10:48:23<18:54:16, 3.46it/s] 37%|███▋ | 136014/371472 [10:48:24<18:44:31, 3.49it/s] 37%|███▋ | 136015/371472 [10:48:24<18:56:12, 3.45it/s] 37%|███▋ | 136016/371472 [10:48:24<17:57:44, 3.64it/s] 37%|███▋ | 136017/371472 [10:48:25<18:51:54, 3.47it/s] 37%|███▋ | 136018/371472 [10:48:25<18:13:49, 3.59it/s] 37%|███▋ | 136019/371472 [10:48:25<18:12:02, 3.59it/s] 37%|███▋ | 136020/371472 [10:48:25<17:56:32, 3.65it/s] {'loss': 3.164, 'learning_rate': 6.707589045228295e-07, 'epoch': 5.86} + 37%|███▋ | 136020/371472 [10:48:25<17:56:32, 3.65it/s] 37%|███▋ | 136021/371472 [10:48:26<18:26:34, 3.55it/s] 37%|███▋ | 136022/371472 [10:48:26<18:36:16, 3.52it/s] 37%|███▋ | 136023/371472 [10:48:26<18:10:12, 3.60it/s] 37%|███▋ | 136024/371472 [10:48:26<17:30:21, 3.74it/s] 37%|███▋ | 136025/371472 [10:48:27<17:49:16, 3.67it/s] 37%|███▋ | 136026/371472 [10:48:27<17:49:48, 3.67it/s] 37%|███▋ | 136027/371472 [10:48:27<17:31:47, 3.73it/s] 37%|███▋ | 136028/371472 [10:48:27<17:13:57, 3.80it/s] 37%|███▋ | 136029/371472 [10:48:28<17:41:12, 3.70it/s] 37%|███▋ | 136030/371472 [10:48:28<18:11:41, 3.59it/s] 37%|███▋ | 136031/371472 [10:48:28<18:13:35, 3.59it/s] 37%|███▋ | 136032/371472 [10:48:29<18:34:52, 3.52it/s] 37%|███▋ | 136033/371472 [10:48:29<17:37:43, 3.71it/s] 37%|███▋ | 136034/371472 [10:48:29<17:55:45, 3.65it/s] 37%|███▋ | 136035/371472 [10:48:29<18:04:29, 3.62it/s] 37%|███▋ | 136036/371472 [10:48:30<18:09:06, 3.60it/s] 37%|███▋ | 136037/371472 [10:48:30<18:00:26, 3.63it/s] 37%|███▋ | 136038/371472 [10:48:30<18:21:06, 3.56it/s] 37%|███▋ | 136039/371472 [10:48:31<18:36:33, 3.51it/s] 37%|███▋ | 136040/371472 [10:48:31<18:31:45, 3.53it/s] {'loss': 3.2286, 'learning_rate': 6.707104225473507e-07, 'epoch': 5.86} + 37%|███▋ | 136040/371472 [10:48:31<18:31:45, 3.53it/s] 37%|███▋ | 136041/371472 [10:48:31<17:59:02, 3.64it/s] 37%|███▋ | 136042/371472 [10:48:31<17:50:26, 3.67it/s] 37%|███▋ | 136043/371472 [10:48:32<17:09:55, 3.81it/s] 37%|███▋ | 136044/371472 [10:48:32<17:11:49, 3.80it/s] 37%|███▋ | 136045/371472 [10:48:32<18:54:10, 3.46it/s] 37%|███▋ | 136046/371472 [10:48:33<19:02:46, 3.43it/s] 37%|███▋ | 136047/371472 [10:48:33<20:53:08, 3.13it/s] 37%|███▋ | 136048/371472 [10:48:33<19:51:23, 3.29it/s] 37%|███▋ | 136049/371472 [10:48:33<19:39:11, 3.33it/s] 37%|███▋ | 136050/371472 [10:48:34<19:09:05, 3.41it/s] 37%|███▋ | 136051/371472 [10:48:34<19:31:08, 3.35it/s] 37%|███▋ | 136052/371472 [10:48:34<18:34:26, 3.52it/s] 37%|███▋ | 136053/371472 [10:48:35<18:53:37, 3.46it/s] 37%|███▋ | 136054/371472 [10:48:35<18:52:39, 3.46it/s] 37%|███▋ | 136055/371472 [10:48:35<20:00:25, 3.27it/s] 37%|███▋ | 136056/371472 [10:48:35<18:37:22, 3.51it/s] 37%|███▋ | 136057/371472 [10:48:36<18:03:56, 3.62it/s] 37%|███▋ | 136058/371472 [10:48:36<17:39:21, 3.70it/s] 37%|███▋ | 136059/371472 [10:48:36<17:06:53, 3.82it/s] 37%|███▋ | 136060/371472 [10:48:37<17:25:19, 3.75it/s] {'loss': 3.2019, 'learning_rate': 6.706619405718719e-07, 'epoch': 5.86} + 37%|███▋ | 136060/371472 [10:48:37<17:25:19, 3.75it/s] 37%|███▋ | 136061/371472 [10:48:37<17:06:49, 3.82it/s] 37%|███▋ | 136062/371472 [10:48:37<16:58:58, 3.85it/s] 37%|███▋ | 136063/371472 [10:48:37<16:57:03, 3.86it/s] 37%|███▋ | 136064/371472 [10:48:38<16:49:32, 3.89it/s] 37%|███▋ | 136065/371472 [10:48:38<18:55:43, 3.45it/s] 37%|███▋ | 136066/371472 [10:48:38<19:04:39, 3.43it/s] 37%|███▋ | 136067/371472 [10:48:38<19:25:19, 3.37it/s] 37%|███▋ | 136068/371472 [10:48:39<18:17:28, 3.57it/s] 37%|███▋ | 136069/371472 [10:48:39<20:05:46, 3.25it/s] 37%|███▋ | 136070/371472 [10:48:39<19:30:20, 3.35it/s] 37%|███▋ | 136071/371472 [10:48:40<19:25:48, 3.37it/s] 37%|███▋ | 136072/371472 [10:48:40<18:28:09, 3.54it/s] 37%|███▋ | 136073/371472 [10:48:40<18:50:17, 3.47it/s] 37%|███▋ | 136074/371472 [10:48:40<18:34:21, 3.52it/s] 37%|███▋ | 136075/371472 [10:48:41<17:44:05, 3.69it/s] 37%|███▋ | 136076/371472 [10:48:41<17:43:01, 3.69it/s] 37%|███▋ | 136077/371472 [10:48:41<18:12:34, 3.59it/s] 37%|███▋ | 136078/371472 [10:48:42<17:50:55, 3.66it/s] 37%|███▋ | 136079/371472 [10:48:42<17:39:42, 3.70it/s] 37%|███▋ | 136080/371472 [10:48:42<17:20:41, 3.77it/s] {'loss': 2.9319, 'learning_rate': 6.706134585963929e-07, 'epoch': 5.86} + 37%|███▋ | 136080/371472 [10:48:42<17:20:41, 3.77it/s] 37%|███▋ | 136081/371472 [10:48:42<18:06:58, 3.61it/s] 37%|███▋ | 136082/371472 [10:48:43<18:14:38, 3.58it/s] 37%|███▋ | 136083/371472 [10:48:43<18:36:11, 3.51it/s] 37%|███▋ | 136084/371472 [10:48:43<17:51:19, 3.66it/s] 37%|███▋ | 136085/371472 [10:48:43<17:27:29, 3.75it/s] 37%|███▋ | 136086/371472 [10:48:44<17:03:59, 3.83it/s] 37%|███▋ | 136087/371472 [10:48:44<17:22:41, 3.76it/s] 37%|███▋ | 136088/371472 [10:48:44<18:00:27, 3.63it/s] 37%|███▋ | 136089/371472 [10:48:45<19:16:57, 3.39it/s] 37%|███▋ | 136090/371472 [10:48:45<18:59:36, 3.44it/s] 37%|███▋ | 136091/371472 [10:48:45<18:21:30, 3.56it/s] 37%|███▋ | 136092/371472 [10:48:45<18:51:49, 3.47it/s] 37%|███▋ | 136093/371472 [10:48:46<18:54:44, 3.46it/s] 37%|███▋ | 136094/371472 [10:48:46<20:18:40, 3.22it/s] 37%|███▋ | 136095/371472 [10:48:46<19:20:46, 3.38it/s] 37%|███▋ | 136096/371472 [10:48:47<19:42:21, 3.32it/s] 37%|███▋ | 136097/371472 [10:48:47<18:27:24, 3.54it/s] 37%|███▋ | 136098/371472 [10:48:47<18:49:39, 3.47it/s] 37%|███▋ | 136099/371472 [10:48:48<18:33:45, 3.52it/s] 37%|███▋ | 136100/371472 [10:48:48<18:09:35, 3.60it/s] {'loss': 3.1988, 'learning_rate': 6.70564976620914e-07, 'epoch': 5.86} + 37%|███▋ | 136100/371472 [10:48:48<18:09:35, 3.60it/s] 37%|███▋ | 136101/371472 [10:48:48<18:44:15, 3.49it/s] 37%|███▋ | 136102/371472 [10:48:48<20:16:22, 3.23it/s] 37%|███▋ | 136103/371472 [10:48:49<21:52:15, 2.99it/s] 37%|███▋ | 136104/371472 [10:48:49<20:43:37, 3.15it/s] 37%|███▋ | 136105/371472 [10:48:49<19:16:13, 3.39it/s] 37%|███▋ | 136106/371472 [10:48:50<18:35:33, 3.52it/s] 37%|███▋ | 136107/371472 [10:48:50<19:57:32, 3.28it/s] 37%|███▋ | 136108/371472 [10:48:50<21:46:04, 3.00it/s] 37%|███▋ | 136109/371472 [10:48:51<20:46:40, 3.15it/s] 37%|███▋ | 136110/371472 [10:48:51<19:49:09, 3.30it/s] 37%|███▋ | 136111/371472 [10:48:51<18:47:31, 3.48it/s] 37%|███▋ | 136112/371472 [10:48:51<18:22:12, 3.56it/s] 37%|███▋ | 136113/371472 [10:48:52<18:03:57, 3.62it/s] 37%|███▋ | 136114/371472 [10:48:52<17:35:10, 3.72it/s] 37%|███▋ | 136115/371472 [10:48:52<17:58:52, 3.64it/s] 37%|███▋ | 136116/371472 [10:48:53<18:05:53, 3.61it/s] 37%|███▋ | 136117/371472 [10:48:53<17:44:37, 3.68it/s] 37%|███▋ | 136118/371472 [10:48:53<17:57:05, 3.64it/s] 37%|███▋ | 136119/371472 [10:48:53<18:22:47, 3.56it/s] 37%|███▋ | 136120/371472 [10:48:54<17:52:44, 3.66it/s] {'loss': 3.0192, 'learning_rate': 6.705164946454352e-07, 'epoch': 5.86} + 37%|███▋ | 136120/371472 [10:48:54<17:52:44, 3.66it/s] 37%|███▋ | 136121/371472 [10:48:54<17:46:37, 3.68it/s] 37%|███▋ | 136122/371472 [10:48:54<17:01:16, 3.84it/s] 37%|███▋ | 136123/371472 [10:48:54<18:09:54, 3.60it/s] 37%|███▋ | 136124/371472 [10:48:55<17:43:43, 3.69it/s] 37%|███▋ | 136125/371472 [10:48:55<17:41:03, 3.70it/s] 37%|███▋ | 136126/371472 [10:48:55<17:04:36, 3.83it/s] 37%|███▋ | 136127/371472 [10:48:55<16:49:49, 3.88it/s] 37%|███▋ | 136128/371472 [10:48:56<17:11:36, 3.80it/s] 37%|███▋ | 136129/371472 [10:48:56<17:22:56, 3.76it/s] 37%|███▋ | 136130/371472 [10:48:56<18:25:07, 3.55it/s] 37%|███▋ | 136131/371472 [10:48:57<18:08:14, 3.60it/s] 37%|███▋ | 136132/371472 [10:48:57<18:28:03, 3.54it/s] 37%|███▋ | 136133/371472 [10:48:57<18:25:40, 3.55it/s] 37%|███▋ | 136134/371472 [10:48:58<20:01:56, 3.26it/s] 37%|███▋ | 136135/371472 [10:48:58<20:40:22, 3.16it/s] 37%|███▋ | 136136/371472 [10:48:58<19:45:40, 3.31it/s] 37%|███▋ | 136137/371472 [10:48:58<18:54:23, 3.46it/s] 37%|███▋ | 136138/371472 [10:48:59<18:20:10, 3.57it/s] 37%|███▋ | 136139/371472 [10:48:59<17:50:55, 3.66it/s] 37%|███▋ | 136140/371472 [10:48:59<18:30:46, 3.53it/s] {'loss': 3.0822, 'learning_rate': 6.704680126699562e-07, 'epoch': 5.86} + 37%|███▋ | 136140/371472 [10:48:59<18:30:46, 3.53it/s] 37%|███▋ | 136141/371472 [10:48:59<18:08:24, 3.60it/s] 37%|███▋ | 136142/371472 [10:49:00<17:14:56, 3.79it/s] 37%|███▋ | 136143/371472 [10:49:00<17:21:32, 3.77it/s] 37%|███▋ | 136144/371472 [10:49:00<18:55:21, 3.45it/s] 37%|███▋ | 136145/371472 [10:49:01<18:15:15, 3.58it/s] 37%|███▋ | 136146/371472 [10:49:01<18:34:01, 3.52it/s] 37%|███▋ | 136147/371472 [10:49:01<18:53:30, 3.46it/s] 37%|███▋ | 136148/371472 [10:49:02<19:38:53, 3.33it/s] 37%|███▋ | 136149/371472 [10:49:02<18:48:59, 3.47it/s] 37%|███▋ | 136150/371472 [10:49:02<18:18:00, 3.57it/s] 37%|███▋ | 136151/371472 [10:49:02<18:51:38, 3.47it/s] 37%|███▋ | 136152/371472 [10:49:03<18:23:11, 3.56it/s] 37%|███▋ | 136153/371472 [10:49:03<17:47:19, 3.67it/s] 37%|███▋ | 136154/371472 [10:49:03<18:32:26, 3.53it/s] 37%|███▋ | 136155/371472 [10:49:03<18:36:34, 3.51it/s] 37%|███▋ | 136156/371472 [10:49:04<18:31:08, 3.53it/s] 37%|███▋ | 136157/371472 [10:49:04<18:40:29, 3.50it/s] 37%|███▋ | 136158/371472 [10:49:04<18:14:57, 3.58it/s] 37%|███▋ | 136159/371472 [10:49:05<18:58:07, 3.45it/s] 37%|███▋ | 136160/371472 [10:49:05<19:28:05, 3.36it/s] {'loss': 3.1441, 'learning_rate': 6.704195306944773e-07, 'epoch': 5.86} + 37%|███▋ | 136160/371472 [10:49:05<19:28:05, 3.36it/s] 37%|███▋ | 136161/371472 [10:49:05<18:40:53, 3.50it/s] 37%|███▋ | 136162/371472 [10:49:05<17:51:14, 3.66it/s] 37%|███▋ | 136163/371472 [10:49:06<17:42:46, 3.69it/s] 37%|███▋ | 136164/371472 [10:49:06<18:32:54, 3.52it/s] 37%|███▋ | 136165/371472 [10:49:06<19:13:01, 3.40it/s] 37%|███▋ | 136166/371472 [10:49:07<18:51:11, 3.47it/s] 37%|███▋ | 136167/371472 [10:49:07<18:37:59, 3.51it/s] 37%|███▋ | 136168/371472 [10:49:07<18:36:54, 3.51it/s] 37%|███▋ | 136169/371472 [10:49:07<17:59:44, 3.63it/s] 37%|███▋ | 136170/371472 [10:49:08<17:56:20, 3.64it/s] 37%|███▋ | 136171/371472 [10:49:08<19:55:58, 3.28it/s] 37%|███▋ | 136172/371472 [10:49:08<20:01:52, 3.26it/s] 37%|███�� | 136173/371472 [10:49:09<19:59:30, 3.27it/s] 37%|███▋ | 136174/371472 [10:49:09<18:43:05, 3.49it/s] 37%|███▋ | 136175/371472 [10:49:09<18:45:42, 3.48it/s] 37%|███▋ | 136176/371472 [10:49:09<18:00:20, 3.63it/s] 37%|███▋ | 136177/371472 [10:49:10<18:00:30, 3.63it/s] 37%|███▋ | 136178/371472 [10:49:10<17:30:10, 3.73it/s] 37%|███▋ | 136179/371472 [10:49:10<16:46:46, 3.90it/s] 37%|███▋ | 136180/371472 [10:49:11<18:42:50, 3.49it/s] {'loss': 3.1282, 'learning_rate': 6.703710487189984e-07, 'epoch': 5.87} + 37%|███▋ | 136180/371472 [10:49:11<18:42:50, 3.49it/s] 37%|███▋ | 136181/371472 [10:49:11<18:45:04, 3.49it/s] 37%|███▋ | 136182/371472 [10:49:11<19:02:12, 3.43it/s] 37%|███▋ | 136183/371472 [10:49:11<18:06:38, 3.61it/s] 37%|███▋ | 136184/371472 [10:49:12<18:14:23, 3.58it/s] 37%|███▋ | 136185/371472 [10:49:12<18:23:46, 3.55it/s] 37%|███▋ | 136186/371472 [10:49:12<19:04:10, 3.43it/s] 37%|███▋ | 136187/371472 [10:49:13<19:03:21, 3.43it/s] 37%|███▋ | 136188/371472 [10:49:13<19:38:38, 3.33it/s] 37%|███▋ | 136189/371472 [10:49:13<19:08:54, 3.41it/s] 37%|███▋ | 136190/371472 [10:49:13<19:01:34, 3.44it/s] 37%|███▋ | 136191/371472 [10:49:14<18:55:44, 3.45it/s] 37%|███▋ | 136192/371472 [10:49:14<18:16:38, 3.58it/s] 37%|███▋ | 136193/371472 [10:49:14<18:55:54, 3.45it/s] 37%|███▋ | 136194/371472 [10:49:15<18:23:13, 3.55it/s] 37%|███▋ | 136195/371472 [10:49:15<17:58:55, 3.63it/s] 37%|███▋ | 136196/371472 [10:49:15<17:54:01, 3.65it/s] 37%|███▋ | 136197/371472 [10:49:15<17:21:32, 3.76it/s] 37%|███▋ | 136198/371472 [10:49:16<17:27:40, 3.74it/s] 37%|███▋ | 136199/371472 [10:49:16<18:03:15, 3.62it/s] 37%|███▋ | 136200/371472 [10:49:16<18:02:08, 3.62it/s] {'loss': 3.1296, 'learning_rate': 6.703225667435196e-07, 'epoch': 5.87} + 37%|███▋ | 136200/371472 [10:49:16<18:02:08, 3.62it/s] 37%|███▋ | 136201/371472 [10:49:16<17:42:20, 3.69it/s] 37%|███▋ | 136202/371472 [10:49:17<18:15:02, 3.58it/s] 37%|███▋ | 136203/371472 [10:49:17<19:01:06, 3.44it/s] 37%|███▋ | 136204/371472 [10:49:17<19:05:51, 3.42it/s] 37%|███▋ | 136205/371472 [10:49:18<17:53:49, 3.65it/s] 37%|███▋ | 136206/371472 [10:49:18<17:52:25, 3.66it/s] 37%|███▋ | 136207/371472 [10:49:18<18:29:18, 3.53it/s] 37%|███▋ | 136208/371472 [10:49:18<18:48:46, 3.47it/s] 37%|███▋ | 136209/371472 [10:49:19<18:29:43, 3.53it/s] 37%|███▋ | 136210/371472 [10:49:19<17:42:47, 3.69it/s] 37%|███▋ | 136211/371472 [10:49:19<18:06:27, 3.61it/s] 37%|███▋ | 136212/371472 [10:49:20<17:19:27, 3.77it/s] 37%|███▋ | 136213/371472 [10:49:20<16:52:54, 3.87it/s] 37%|███▋ | 136214/371472 [10:49:20<17:26:39, 3.75it/s] 37%|███▋ | 136215/371472 [10:49:20<17:48:36, 3.67it/s] 37%|███▋ | 136216/371472 [10:49:21<18:52:55, 3.46it/s] 37%|███▋ | 136217/371472 [10:49:21<19:45:16, 3.31it/s] 37%|███▋ | 136218/371472 [10:49:21<19:17:47, 3.39it/s] 37%|███▋ | 136219/371472 [10:49:22<19:27:40, 3.36it/s] 37%|███▋ | 136220/371472 [10:49:22<19:02:53, 3.43it/s] {'loss': 3.2144, 'learning_rate': 6.702740847680407e-07, 'epoch': 5.87} + 37%|███▋ | 136220/371472 [10:49:22<19:02:53, 3.43it/s] 37%|███▋ | 136221/371472 [10:49:22<19:13:01, 3.40it/s] 37%|███▋ | 136222/371472 [10:49:23<20:27:02, 3.20it/s] 37%|███▋ | 136223/371472 [10:49:23<20:02:06, 3.26it/s] 37%|███▋ | 136224/371472 [10:49:23<19:21:03, 3.38it/s] 37%|███▋ | 136225/371472 [10:49:23<20:39:26, 3.16it/s] 37%|███▋ | 136226/371472 [10:49:24<20:06:26, 3.25it/s] 37%|███▋ | 136227/371472 [10:49:24<19:45:35, 3.31it/s] 37%|███▋ | 136228/371472 [10:49:24<18:40:30, 3.50it/s] 37%|███▋ | 136229/371472 [10:49:25<18:30:05, 3.53it/s] 37%|███▋ | 136230/371472 [10:49:25<19:45:59, 3.31it/s] 37%|███▋ | 136231/371472 [10:49:25<19:17:00, 3.39it/s] 37%|███▋ | 136232/371472 [10:49:26<20:12:49, 3.23it/s] 37%|███▋ | 136233/371472 [10:49:26<18:50:36, 3.47it/s] 37%|███▋ | 136234/371472 [10:49:26<17:54:44, 3.65it/s] 37%|███▋ | 136235/371472 [10:49:26<17:16:28, 3.78it/s] 37%|███▋ | 136236/371472 [10:49:26<17:20:42, 3.77it/s] 37%|███▋ | 136237/371472 [10:49:27<17:07:59, 3.81it/s] 37%|███▋ | 136238/371472 [10:49:27<16:50:45, 3.88it/s] 37%|███▋ | 136239/371472 [10:49:27<17:10:49, 3.80it/s] 37%|███▋ | 136240/371472 [10:49:28<16:54:58, 3.86it/s] {'loss': 3.0968, 'learning_rate': 6.702256027925617e-07, 'epoch': 5.87} + 37%|███▋ | 136240/371472 [10:49:28<16:54:58, 3.86it/s] 37%|███▋ | 136241/371472 [10:49:28<19:13:56, 3.40it/s] 37%|███▋ | 136242/371472 [10:49:28<18:46:17, 3.48it/s] 37%|███▋ | 136243/371472 [10:49:28<18:47:08, 3.48it/s] 37%|███▋ | 136244/371472 [10:49:29<19:00:13, 3.44it/s] 37%|███▋ | 136245/371472 [10:49:29<19:22:20, 3.37it/s] 37%|███▋ | 136246/371472 [10:49:29<18:45:49, 3.48it/s] 37%|███▋ | 136247/371472 [10:49:30<21:12:47, 3.08it/s] 37%|███▋ | 136248/371472 [10:49:30<21:36:31, 3.02it/s] 37%|███▋ | 136249/371472 [10:49:30<19:56:50, 3.28it/s] 37%|███▋ | 136250/371472 [10:49:31<19:27:20, 3.36it/s] 37%|███▋ | 136251/371472 [10:49:31<18:29:16, 3.53it/s] 37%|███▋ | 136252/371472 [10:49:31<19:03:45, 3.43it/s] 37%|███▋ | 136253/371472 [10:49:31<18:57:42, 3.45it/s] 37%|███▋ | 136254/371472 [10:49:32<18:30:37, 3.53it/s] 37%|███▋ | 136255/371472 [10:49:32<18:01:04, 3.63it/s] 37%|███▋ | 136256/371472 [10:49:32<17:38:34, 3.70it/s] 37%|███▋ | 136257/371472 [10:49:33<17:30:45, 3.73it/s] 37%|███▋ | 136258/371472 [10:49:33<17:30:19, 3.73it/s] 37%|███▋ | 136259/371472 [10:49:33<17:50:16, 3.66it/s] 37%|███▋ | 136260/371472 [10:49:33<19:08:10, 3.41it/s] {'loss': 3.3572, 'learning_rate': 6.701771208170829e-07, 'epoch': 5.87} + 37%|███▋ | 136260/371472 [10:49:33<19:08:10, 3.41it/s] 37%|███▋ | 136261/371472 [10:49:34<18:09:54, 3.60it/s] 37%|███▋ | 136262/371472 [10:49:34<17:40:50, 3.70it/s] 37%|███▋ | 136263/371472 [10:49:34<17:27:31, 3.74it/s] 37%|███▋ | 136264/371472 [10:49:34<17:12:53, 3.80it/s] 37%|███▋ | 136265/371472 [10:49:35<18:38:30, 3.50it/s] 37%|███▋ | 136266/371472 [10:49:35<17:55:06, 3.65it/s] 37%|███▋ | 136267/371472 [10:49:35<17:47:43, 3.67it/s] 37%|███▋ | 136268/371472 [10:49:36<17:13:34, 3.79it/s] 37%|███▋ | 136269/371472 [10:49:36<17:19:30, 3.77it/s] 37%|███▋ | 136270/371472 [10:49:36<17:49:22, 3.67it/s] 37%|███▋ | 136271/371472 [10:49:36<17:52:31, 3.65it/s] 37%|███▋ | 136272/371472 [10:49:37<17:58:35, 3.63it/s] 37%|███▋ | 136273/371472 [10:49:37<17:58:56, 3.63it/s] 37%|███▋ | 136274/371472 [10:49:37<17:34:39, 3.72it/s] 37%|███▋ | 136275/371472 [10:49:37<17:59:13, 3.63it/s] 37%|███▋ | 136276/371472 [10:49:38<17:40:02, 3.70it/s] 37%|███▋ | 136277/371472 [10:49:38<18:16:39, 3.57it/s] 37%|███▋ | 136278/371472 [10:49:38<19:42:40, 3.31it/s] 37%|███▋ | 136279/371472 [10:49:39<19:02:18, 3.43it/s] 37%|███▋ | 136280/371472 [10:49:39<18:20:10, 3.56it/s] {'loss': 3.1393, 'learning_rate': 6.70128638841604e-07, 'epoch': 5.87} + 37%|███▋ | 136280/371472 [10:49:39<18:20:10, 3.56it/s] 37%|███▋ | 136281/371472 [10:49:39<18:37:44, 3.51it/s] 37%|███▋ | 136282/371472 [10:49:39<17:35:56, 3.71it/s] 37%|███▋ | 136283/371472 [10:49:40<17:18:41, 3.77it/s] 37%|███▋ | 136284/371472 [10:49:40<18:11:56, 3.59it/s] 37%|███▋ | 136285/371472 [10:49:40<18:21:36, 3.56it/s] 37%|███▋ | 136286/371472 [10:49:41<18:07:50, 3.60it/s] 37%|███▋ | 136287/371472 [10:49:41<17:59:27, 3.63it/s] 37%|███▋ | 136288/371472 [10:49:41<18:36:10, 3.51it/s] 37%|███▋ | 136289/371472 [10:49:41<18:10:17, 3.60it/s] 37%|███▋ | 136290/371472 [10:49:42<17:59:55, 3.63it/s] 37%|███▋ | 136291/371472 [10:49:42<18:23:29, 3.55it/s] 37%|███▋ | 136292/371472 [10:49:42<18:02:58, 3.62it/s] 37%|███▋ | 136293/371472 [10:49:42<17:38:00, 3.70it/s] 37%|███▋ | 136294/371472 [10:49:43<17:56:57, 3.64it/s] 37%|███▋ | 136295/371472 [10:49:43<18:24:52, 3.55it/s] 37%|███▋ | 136296/371472 [10:49:43<18:06:03, 3.61it/s] 37%|███▋ | 136297/371472 [10:49:44<19:46:06, 3.30it/s] 37%|███▋ | 136298/371472 [10:49:44<19:19:07, 3.38it/s] 37%|███▋ | 136299/371472 [10:49:44<19:07:05, 3.42it/s] 37%|███▋ | 136300/371472 [10:49:44<18:24:48, 3.55it/s] {'loss': 3.1237, 'learning_rate': 6.70080156866125e-07, 'epoch': 5.87} + 37%|███▋ | 136300/371472 [10:49:44<18:24:48, 3.55it/s] 37%|███▋ | 136301/371472 [10:49:45<17:51:50, 3.66it/s] 37%|███▋ | 136302/371472 [10:49:45<17:48:16, 3.67it/s] 37%|███▋ | 136303/371472 [10:49:45<17:57:15, 3.64it/s] 37%|███▋ | 136304/371472 [10:49:46<18:33:59, 3.52it/s] 37%|███▋ | 136305/371472 [10:49:46<18:57:13, 3.45it/s] 37%|███▋ | 136306/371472 [10:49:46<20:10:24, 3.24it/s] 37%|███▋ | 136307/371472 [10:49:47<19:46:17, 3.30it/s] 37%|███▋ | 136308/371472 [10:49:47<19:17:41, 3.39it/s] 37%|███▋ | 136309/371472 [10:49:47<19:05:26, 3.42it/s] 37%|███▋ | 136310/371472 [10:49:47<19:46:39, 3.30it/s] 37%|███▋ | 136311/371472 [10:49:48<19:04:42, 3.42it/s] 37%|███▋ | 136312/371472 [10:49:48<18:25:40, 3.54it/s] 37%|███▋ | 136313/371472 [10:49:48<17:57:20, 3.64it/s] 37%|███▋ | 136314/371472 [10:49:49<19:01:44, 3.43it/s] 37%|███▋ | 136315/371472 [10:49:49<19:17:03, 3.39it/s] 37%|███▋ | 136316/371472 [10:49:49<18:38:45, 3.50it/s] 37%|███▋ | 136317/371472 [10:49:49<18:00:04, 3.63it/s] 37%|███▋ | 136318/371472 [10:49:50<18:22:21, 3.56it/s] 37%|███▋ | 136319/371472 [10:49:50<18:07:21, 3.60it/s] 37%|███▋ | 136320/371472 [10:49:50<18:11:36, 3.59it/s] {'loss': 3.0902, 'learning_rate': 6.700316748906461e-07, 'epoch': 5.87} + 37%|███▋ | 136320/371472 [10:49:50<18:11:36, 3.59it/s] 37%|███▋ | 136321/371472 [10:49:50<18:00:32, 3.63it/s] 37%|███▋ | 136322/371472 [10:49:51<18:30:42, 3.53it/s] 37%|███▋ | 136323/371472 [10:49:51<20:01:42, 3.26it/s] 37%|███▋ | 136324/371472 [10:49:51<19:37:20, 3.33it/s] 37%|███▋ | 136325/371472 [10:49:52<18:42:13, 3.49it/s] 37%|███▋ | 136326/371472 [10:49:52<18:59:44, 3.44it/s] 37%|███▋ | 136327/371472 [10:49:52<19:44:49, 3.31it/s] 37%|███▋ | 136328/371472 [10:49:53<19:06:41, 3.42it/s] 37%|███▋ | 136329/371472 [10:49:53<18:48:13, 3.47it/s] 37%|███▋ | 136330/371472 [10:49:53<19:08:35, 3.41it/s] 37%|███▋ | 136331/371472 [10:49:53<18:40:42, 3.50it/s] 37%|███▋ | 136332/371472 [10:49:54<18:17:38, 3.57it/s] 37%|███▋ | 136333/371472 [10:49:54<18:09:16, 3.60it/s] 37%|███▋ | 136334/371472 [10:49:54<17:34:13, 3.72it/s] 37%|███▋ | 136335/371472 [10:49:54<17:11:48, 3.80it/s] 37%|███▋ | 136336/371472 [10:49:55<19:44:21, 3.31it/s] 37%|███▋ | 136337/371472 [10:49:55<18:24:38, 3.55it/s] 37%|███▋ | 136338/371472 [10:49:55<18:10:43, 3.59it/s] 37%|███▋ | 136339/371472 [10:49:56<17:26:58, 3.74it/s] 37%|███▋ | 136340/371472 [10:49:56<17:58:56, 3.63it/s] {'loss': 3.0515, 'learning_rate': 6.699831929151673e-07, 'epoch': 5.87} + 37%|███▋ | 136340/371472 [10:49:56<17:58:56, 3.63it/s] 37%|███▋ | 136341/371472 [10:49:56<17:36:59, 3.71it/s] 37%|███▋ | 136342/371472 [10:49:56<17:31:25, 3.73it/s] 37%|███▋ | 136343/371472 [10:49:57<19:06:48, 3.42it/s] 37%|███▋ | 136344/371472 [10:49:57<19:31:10, 3.35it/s] 37%|███▋ | 136345/371472 [10:49:57<19:23:02, 3.37it/s] 37%|███▋ | 136346/371472 [10:49:58<18:36:15, 3.51it/s] 37%|███▋ | 136347/371472 [10:49:58<18:41:46, 3.49it/s] 37%|███▋ | 136348/371472 [10:49:58<18:09:07, 3.60it/s] 37%|███▋ | 136349/371472 [10:49:58<18:40:23, 3.50it/s] 37%|███▋ | 136350/371472 [10:49:59<18:18:24, 3.57it/s] 37%|███▋ | 136351/371472 [10:49:59<17:19:38, 3.77it/s] 37%|███▋ | 136352/371472 [10:49:59<18:04:17, 3.61it/s] 37%|███▋ | 136353/371472 [10:50:00<17:08:45, 3.81it/s] 37%|███▋ | 136354/371472 [10:50:00<16:57:02, 3.85it/s] 37%|███▋ | 136355/371472 [10:50:00<17:00:02, 3.84it/s] 37%|███▋ | 136356/371472 [10:50:00<19:58:47, 3.27it/s] 37%|███▋ | 136357/371472 [10:50:01<19:10:22, 3.41it/s] 37%|███▋ | 136358/371472 [10:50:01<18:18:46, 3.57it/s] 37%|███▋ | 136359/371472 [10:50:01<18:00:28, 3.63it/s] 37%|███▋ | 136360/371472 [10:50:01<17:57:01, 3.64it/s] {'loss': 3.0444, 'learning_rate': 6.699347109396885e-07, 'epoch': 5.87} + 37%|███▋ | 136360/371472 [10:50:01<17:57:01, 3.64it/s] 37%|███▋ | 136361/371472 [10:50:02<17:37:39, 3.70it/s] 37%|███▋ | 136362/371472 [10:50:02<17:56:26, 3.64it/s] 37%|███▋ | 136363/371472 [10:50:02<18:54:56, 3.45it/s] 37%|███▋ | 136364/371472 [10:50:03<19:42:26, 3.31it/s] 37%|███▋ | 136365/371472 [10:50:03<19:15:10, 3.39it/s] 37%|███▋ | 136366/371472 [10:50:03<19:04:53, 3.42it/s] 37%|███▋ | 136367/371472 [10:50:04<18:49:47, 3.47it/s] 37%|███▋ | 136368/371472 [10:50:04<18:31:20, 3.53it/s] 37%|███▋ | 136369/371472 [10:50:04<19:44:25, 3.31it/s] 37%|███▋ | 136370/371472 [10:50:04<18:33:03, 3.52it/s] 37%|███▋ | 136371/371472 [10:50:05<18:49:26, 3.47it/s] 37%|███▋ | 136372/371472 [10:50:05<18:38:48, 3.50it/s] 37%|███▋ | 136373/371472 [10:50:05<18:27:07, 3.54it/s] 37%|███▋ | 136374/371472 [10:50:05<17:42:47, 3.69it/s] 37%|███▋ | 136375/371472 [10:50:06<18:16:07, 3.57it/s] 37%|███▋ | 136376/371472 [10:50:06<18:06:58, 3.60it/s] 37%|███▋ | 136377/371472 [10:50:06<18:55:13, 3.45it/s] 37%|███▋ | 136378/371472 [10:50:07<18:40:07, 3.50it/s] 37%|███▋ | 136379/371472 [10:50:07<19:52:44, 3.29it/s] 37%|███▋ | 136380/371472 [10:50:07<19:46:23, 3.30it/s] {'loss': 3.2028, 'learning_rate': 6.698862289642095e-07, 'epoch': 5.87} + 37%|███▋ | 136380/371472 [10:50:07<19:46:23, 3.30it/s] 37%|███▋ | 136381/371472 [10:50:08<18:30:22, 3.53it/s] 37%|███▋ | 136382/371472 [10:50:08<18:20:05, 3.56it/s] 37%|███▋ | 136383/371472 [10:50:08<18:01:25, 3.62it/s] 37%|███▋ | 136384/371472 [10:50:08<18:58:33, 3.44it/s] 37%|███▋ | 136385/371472 [10:50:09<18:29:45, 3.53it/s] 37%|███▋ | 136386/371472 [10:50:09<18:16:16, 3.57it/s] 37%|███▋ | 136387/371472 [10:50:09<17:49:45, 3.66it/s] 37%|███▋ | 136388/371472 [10:50:09<17:32:18, 3.72it/s] 37%|███▋ | 136389/371472 [10:50:10<17:44:48, 3.68it/s] 37%|███▋ | 136390/371472 [10:50:10<18:35:03, 3.51it/s] 37%|███▋ | 136391/371472 [10:50:10<18:06:27, 3.61it/s] 37%|███▋ | 136392/371472 [10:50:11<19:59:39, 3.27it/s] 37%|███▋ | 136393/371472 [10:50:11<19:28:30, 3.35it/s] 37%|███▋ | 136394/371472 [10:50:11<20:23:40, 3.20it/s] 37%|███▋ | 136395/371472 [10:50:12<20:03:49, 3.25it/s] 37%|███▋ | 136396/371472 [10:50:12<18:53:56, 3.46it/s] 37%|███▋ | 136397/371472 [10:50:12<18:40:43, 3.50it/s] 37%|███▋ | 136398/371472 [10:50:12<19:10:59, 3.40it/s] 37%|███▋ | 136399/371472 [10:50:13<18:47:54, 3.47it/s] 37%|███▋ | 136400/371472 [10:50:13<18:53:31, 3.46it/s] {'loss': 3.1298, 'learning_rate': 6.698377469887306e-07, 'epoch': 5.88} + 37%|███▋ | 136400/371472 [10:50:13<18:53:31, 3.46it/s] 37%|███▋ | 136401/371472 [10:50:13<18:13:33, 3.58it/s] 37%|███▋ | 136402/371472 [10:50:14<18:17:47, 3.57it/s] 37%|███▋ | 136403/371472 [10:50:14<18:44:49, 3.48it/s] 37%|███▋ | 136404/371472 [10:50:14<18:23:14, 3.55it/s] 37%|███▋ | 136405/371472 [10:50:14<17:39:19, 3.70it/s] 37%|███▋ | 136406/371472 [10:50:15<17:07:44, 3.81it/s] 37%|███▋ | 136407/371472 [10:50:15<16:41:44, 3.91it/s] 37%|███▋ | 136408/371472 [10:50:15<16:47:15, 3.89it/s] 37%|███▋ | 136409/371472 [10:50:15<17:35:17, 3.71it/s] 37%|███▋ | 136410/371472 [10:50:16<18:13:09, 3.58it/s] 37%|███▋ | 136411/371472 [10:50:16<17:45:12, 3.68it/s] 37%|███▋ | 136412/371472 [10:50:16<18:56:11, 3.45it/s] 37%|███▋ | 136413/371472 [10:50:17<19:37:21, 3.33it/s] 37%|███▋ | 136414/371472 [10:50:17<19:18:37, 3.38it/s] 37%|███▋ | 136415/371472 [10:50:17<18:27:42, 3.54it/s] 37%|███▋ | 136416/371472 [10:50:17<18:12:30, 3.59it/s] 37%|███▋ | 136417/371472 [10:50:18<18:13:40, 3.58it/s] 37%|███▋ | 136418/371472 [10:50:18<17:53:30, 3.65it/s] 37%|███▋ | 136419/371472 [10:50:18<19:13:08, 3.40it/s] 37%|███▋ | 136420/371472 [10:50:19<18:29:14, 3.53it/s] {'loss': 3.0716, 'learning_rate': 6.697892650132518e-07, 'epoch': 5.88} + 37%|███▋ | 136420/371472 [10:50:19<18:29:14, 3.53it/s] 37%|███▋ | 136421/371472 [10:50:19<20:48:10, 3.14it/s] 37%|███▋ | 136422/371472 [10:50:19<19:32:58, 3.34it/s] 37%|███▋ | 136423/371472 [10:50:19<18:46:29, 3.48it/s] 37%|███▋ | 136424/371472 [10:50:20<19:52:00, 3.29it/s] 37%|███▋ | 136425/371472 [10:50:20<18:37:35, 3.51it/s] 37%|███▋ | 136426/371472 [10:50:20<18:48:04, 3.47it/s] 37%|███▋ | 136427/371472 [10:50:21<17:39:56, 3.70it/s] 37%|███▋ | 136428/371472 [10:50:21<17:35:43, 3.71it/s] 37%|███▋ | 136429/371472 [10:50:21<16:48:40, 3.88it/s] 37%|███▋ | 136430/371472 [10:50:21<16:51:12, 3.87it/s] 37%|███▋ | 136431/371472 [10:50:22<17:14:38, 3.79it/s] 37%|███▋ | 136432/371472 [10:50:22<17:57:08, 3.64it/s] 37%|███▋ | 136433/371472 [10:50:22<17:48:20, 3.67it/s] 37%|███▋ | 136434/371472 [10:50:22<17:44:56, 3.68it/s] 37%|███▋ | 136435/371472 [10:50:23<17:12:04, 3.80it/s] 37%|███▋ | 136436/371472 [10:50:23<16:51:06, 3.87it/s] 37%|███▋ | 136437/371472 [10:50:23<16:40:42, 3.91it/s] 37%|███▋ | 136438/371472 [10:50:23<16:23:14, 3.98it/s] 37%|███▋ | 136439/371472 [10:50:24<16:27:58, 3.96it/s] 37%|███▋ | 136440/371472 [10:50:24<17:13:22, 3.79it/s] {'loss': 3.0892, 'learning_rate': 6.697407830377728e-07, 'epoch': 5.88} + 37%|███▋ | 136440/371472 [10:50:24<17:13:22, 3.79it/s] 37%|███▋ | 136441/371472 [10:50:24<17:09:12, 3.81it/s] 37%|███▋ | 136442/371472 [10:50:25<17:25:21, 3.75it/s] 37%|███▋ | 136443/371472 [10:50:25<18:00:25, 3.63it/s] 37%|███▋ | 136444/371472 [10:50:25<17:52:17, 3.65it/s] 37%|███▋ | 136445/371472 [10:50:25<17:12:00, 3.80it/s] 37%|███▋ | 136446/371472 [10:50:26<17:54:43, 3.64it/s] 37%|███▋ | 136447/371472 [10:50:26<18:08:36, 3.60it/s] 37%|███▋ | 136448/371472 [10:50:26<18:58:22, 3.44it/s] 37%|███▋ | 136449/371472 [10:50:27<18:49:32, 3.47it/s] 37%|███▋ | 136450/371472 [10:50:27<18:06:34, 3.60it/s] 37%|███▋ | 136451/371472 [10:50:27<19:00:39, 3.43it/s] 37%|███▋ | 136452/371472 [10:50:27<18:14:38, 3.58it/s] 37%|███▋ | 136453/371472 [10:50:28<18:38:31, 3.50it/s] 37%|███▋ | 136454/371472 [10:50:28<18:31:43, 3.52it/s] 37%|███▋ | 136455/371472 [10:50:28<17:57:56, 3.63it/s] 37%|███▋ | 136456/371472 [10:50:28<17:46:19, 3.67it/s] 37%|███▋ | 136457/371472 [10:50:29<18:13:03, 3.58it/s] 37%|███▋ | 136458/371472 [10:50:29<17:40:20, 3.69it/s] 37%|███▋ | 136459/371472 [10:50:29<18:21:36, 3.56it/s] 37%|███▋ | 136460/371472 [10:50:30<18:24:52, 3.55it/s] {'loss': 3.0835, 'learning_rate': 6.696923010622939e-07, 'epoch': 5.88} + 37%|███▋ | 136460/371472 [10:50:30<18:24:52, 3.55it/s] 37%|███▋ | 136461/371472 [10:50:30<17:51:31, 3.66it/s] 37%|███▋ | 136462/371472 [10:50:30<17:39:26, 3.70it/s] 37%|███▋ | 136463/371472 [10:50:30<18:38:47, 3.50it/s] 37%|███▋ | 136464/371472 [10:50:31<18:19:08, 3.56it/s] 37%|███▋ | 136465/371472 [10:50:31<20:19:29, 3.21it/s] 37%|███▋ | 136466/371472 [10:50:31<19:56:46, 3.27it/s] 37%|███▋ | 136467/371472 [10:50:32<19:33:50, 3.34it/s] 37%|███▋ | 136468/371472 [10:50:32<18:46:15, 3.48it/s] 37%|███▋ | 136469/371472 [10:50:32<19:05:28, 3.42it/s] 37%|███▋ | 136470/371472 [10:50:32<18:01:28, 3.62it/s] 37%|███▋ | 136471/371472 [10:50:33<17:44:22, 3.68it/s] 37%|███▋ | 136472/371472 [10:50:33<18:37:40, 3.50it/s] 37%|███▋ | 136473/371472 [10:50:33<18:10:58, 3.59it/s] 37%|███▋ | 136474/371472 [10:50:34<17:40:32, 3.69it/s] 37%|███▋ | 136475/371472 [10:50:34<17:46:20, 3.67it/s] 37%|███▋ | 136476/371472 [10:50:34<17:35:08, 3.71it/s] 37%|███▋ | 136477/371472 [10:50:34<17:42:36, 3.69it/s] 37%|███▋ | 136478/371472 [10:50:35<17:58:35, 3.63it/s] 37%|███▋ | 136479/371472 [10:50:35<17:23:15, 3.75it/s] 37%|███▋ | 136480/371472 [10:50:35<17:27:17, 3.74it/s] {'loss': 2.9605, 'learning_rate': 6.69643819086815e-07, 'epoch': 5.88} + 37%|███▋ | 136480/371472 [10:50:35<17:27:17, 3.74it/s] 37%|███▋ | 136481/371472 [10:50:35<17:20:34, 3.76it/s] 37%|███▋ | 136482/371472 [10:50:36<18:09:30, 3.59it/s] 37%|███▋ | 136483/371472 [10:50:36<18:07:10, 3.60it/s] 37%|███▋ | 136484/371472 [10:50:36<17:59:54, 3.63it/s] 37%|███▋ | 136485/371472 [10:50:37<18:09:02, 3.60it/s] 37%|███▋ | 136486/371472 [10:50:37<17:31:25, 3.72it/s] 37%|███▋ | 136487/371472 [10:50:37<19:18:25, 3.38it/s] 37%|███▋ | 136488/371472 [10:50:37<18:36:29, 3.51it/s] 37%|███▋ | 136489/371472 [10:50:38<19:07:51, 3.41it/s] 37%|███▋ | 136490/371472 [10:50:38<19:10:32, 3.40it/s] 37%|███▋ | 136491/371472 [10:50:38<19:16:14, 3.39it/s] 37%|███▋ | 136492/371472 [10:50:39<19:36:24, 3.33it/s] 37%|███▋ | 136493/371472 [10:50:39<18:38:55, 3.50it/s] 37%|███▋ | 136494/371472 [10:50:39<20:18:04, 3.22it/s] 37%|███▋ | 136495/371472 [10:50:40<19:26:49, 3.36it/s] 37%|███▋ | 136496/371472 [10:50:40<18:56:30, 3.45it/s] 37%|███▋ | 136497/371472 [10:50:40<18:54:05, 3.45it/s] 37%|███▋ | 136498/371472 [10:50:40<18:59:27, 3.44it/s] 37%|███▋ | 136499/371472 [10:50:41<18:22:40, 3.55it/s] 37%|███▋ | 136500/371472 [10:50:41<20:05:18, 3.25it/s] {'loss': 3.0078, 'learning_rate': 6.695953371113362e-07, 'epoch': 5.88} + 37%|███▋ | 136500/371472 [10:50:41<20:05:18, 3.25it/s] 37%|███▋ | 136501/371472 [10:50:41<20:02:33, 3.26it/s] 37%|███▋ | 136502/371472 [10:50:42<20:39:13, 3.16it/s] 37%|███▋ | 136503/371472 [10:50:42<21:05:52, 3.09it/s] 37%|███▋ | 136504/371472 [10:50:42<21:17:57, 3.06it/s] 37%|███▋ | 136505/371472 [10:50:43<20:55:06, 3.12it/s] 37%|███▋ | 136506/371472 [10:50:43<20:16:13, 3.22it/s] 37%|███▋ | 136507/371472 [10:50:43<20:10:19, 3.24it/s] 37%|███▋ | 136508/371472 [10:50:43<19:05:24, 3.42it/s] 37%|███▋ | 136509/371472 [10:50:44<19:43:12, 3.31it/s] 37%|███▋ | 136510/371472 [10:50:44<19:51:34, 3.29it/s] 37%|███▋ | 136511/371472 [10:50:44<19:19:11, 3.38it/s] 37%|███▋ | 136512/371472 [10:50:45<19:01:13, 3.43it/s] 37%|███▋ | 136513/371472 [10:50:45<18:00:31, 3.62it/s] 37%|███▋ | 136514/371472 [10:50:45<19:11:36, 3.40it/s] 37%|███▋ | 136515/371472 [10:50:46<18:39:29, 3.50it/s] 37%|███▋ | 136516/371472 [10:50:46<18:03:54, 3.61it/s] 37%|███▋ | 136517/371472 [10:50:46<18:22:54, 3.55it/s] 37%|███▋ | 136518/371472 [10:50:46<18:48:31, 3.47it/s] 37%|███▋ | 136519/371472 [10:50:47<18:06:31, 3.60it/s] 37%|███▋ | 136520/371472 [10:50:47<17:17:25, 3.77it/s] {'loss': 3.0952, 'learning_rate': 6.695468551358572e-07, 'epoch': 5.88} + 37%|███▋ | 136520/371472 [10:50:47<17:17:25, 3.77it/s] 37%|███▋ | 136521/371472 [10:50:47<19:20:42, 3.37it/s] 37%|███▋ | 136522/371472 [10:50:47<18:37:07, 3.51it/s] 37%|███▋ | 136523/371472 [10:50:48<18:35:05, 3.51it/s] 37%|███▋ | 136524/371472 [10:50:48<17:51:00, 3.66it/s] 37%|███▋ | 136525/371472 [10:50:48<17:53:15, 3.65it/s] 37%|███▋ | 136526/371472 [10:50:49<18:49:37, 3.47it/s] 37%|███▋ | 136527/371472 [10:50:49<18:07:01, 3.60it/s] 37%|███▋ | 136528/371472 [10:50:49<18:12:37, 3.58it/s] 37%|███▋ | 136529/371472 [10:50:49<18:03:17, 3.61it/s] 37%|███▋ | 136530/371472 [10:50:50<17:13:19, 3.79it/s] 37%|███▋ | 136531/371472 [10:50:50<17:03:44, 3.82it/s] 37%|███▋ | 136532/371472 [10:50:50<17:12:54, 3.79it/s] 37%|███▋ | 136533/371472 [10:50:50<16:55:31, 3.86it/s] 37%|███▋ | 136534/371472 [10:50:51<16:51:30, 3.87it/s] 37%|███▋ | 136535/371472 [10:50:51<18:01:48, 3.62it/s] 37%|███▋ | 136536/371472 [10:50:51<17:51:25, 3.65it/s] 37%|███▋ | 136537/371472 [10:50:52<19:01:18, 3.43it/s] 37%|███▋ | 136538/371472 [10:50:52<18:40:20, 3.49it/s] 37%|███▋ | 136539/371472 [10:50:52<19:34:56, 3.33it/s] 37%|███▋ | 136540/371472 [10:50:53<20:03:44, 3.25it/s] {'loss': 3.1124, 'learning_rate': 6.694983731603783e-07, 'epoch': 5.88} + 37%|███▋ | 136540/371472 [10:50:53<20:03:44, 3.25it/s] 37%|███▋ | 136541/371472 [10:50:53<19:01:58, 3.43it/s] 37%|███▋ | 136542/371472 [10:50:53<18:19:09, 3.56it/s] 37%|███▋ | 136543/371472 [10:50:53<17:52:45, 3.65it/s] 37%|███▋ | 136544/371472 [10:50:54<17:46:40, 3.67it/s] 37%|███▋ | 136545/371472 [10:50:54<18:07:44, 3.60it/s] 37%|███▋ | 136546/371472 [10:50:54<18:19:44, 3.56it/s] 37%|███▋ | 136547/371472 [10:50:54<17:45:39, 3.67it/s] 37%|███▋ | 136548/371472 [10:50:55<17:54:58, 3.64it/s] 37%|███▋ | 136549/371472 [10:50:55<18:00:14, 3.62it/s] 37%|███▋ | 136550/371472 [10:50:55<17:48:01, 3.67it/s] 37%|███▋ | 136551/371472 [10:50:56<18:04:39, 3.61it/s] 37%|███▋ | 136552/371472 [10:50:56<17:28:38, 3.73it/s] 37%|███▋ | 136553/371472 [10:50:56<19:08:16, 3.41it/s] 37%|███▋ | 136554/371472 [10:50:56<18:42:58, 3.49it/s] 37%|███▋ | 136555/371472 [10:50:57<18:10:00, 3.59it/s] 37%|███��� | 136556/371472 [10:50:57<18:18:11, 3.57it/s] 37%|███▋ | 136557/371472 [10:50:57<18:58:01, 3.44it/s] 37%|███▋ | 136558/371472 [10:50:57<18:08:23, 3.60it/s] 37%|███▋ | 136559/371472 [10:50:58<18:09:52, 3.59it/s] 37%|███▋ | 136560/371472 [10:50:58<17:50:33, 3.66it/s] {'loss': 3.2002, 'learning_rate': 6.694498911848994e-07, 'epoch': 5.88} + 37%|███▋ | 136560/371472 [10:50:58<17:50:33, 3.66it/s] 37%|███▋ | 136561/371472 [10:50:58<18:09:00, 3.60it/s] 37%|███▋ | 136562/371472 [10:50:59<17:23:54, 3.75it/s] 37%|███▋ | 136563/371472 [10:50:59<18:24:28, 3.54it/s] 37%|███▋ | 136564/371472 [10:50:59<18:17:35, 3.57it/s] 37%|███▋ | 136565/371472 [10:50:59<18:46:15, 3.48it/s] 37%|███▋ | 136566/371472 [10:51:00<19:14:37, 3.39it/s] 37%|███▋ | 136567/371472 [10:51:00<19:08:45, 3.41it/s] 37%|███▋ | 136568/371472 [10:51:00<19:34:51, 3.33it/s] 37%|███▋ | 136569/371472 [10:51:01<19:39:13, 3.32it/s] 37%|███▋ | 136570/371472 [10:51:01<19:12:42, 3.40it/s] 37%|███▋ | 136571/371472 [10:51:01<20:01:43, 3.26it/s] 37%|███▋ | 136572/371472 [10:51:02<19:32:22, 3.34it/s] 37%|███▋ | 136573/371472 [10:51:02<18:36:49, 3.51it/s] 37%|███▋ | 136574/371472 [10:51:02<17:54:59, 3.64it/s] 37%|███▋ | 136575/371472 [10:51:02<20:10:29, 3.23it/s] 37%|███▋ | 136576/371472 [10:51:03<20:26:17, 3.19it/s] 37%|███▋ | 136577/371472 [10:51:03<19:33:56, 3.33it/s] 37%|███▋ | 136578/371472 [10:51:03<18:35:40, 3.51it/s] 37%|███▋ | 136579/371472 [10:51:04<18:16:04, 3.57it/s] 37%|███▋ | 136580/371472 [10:51:04<19:03:31, 3.42it/s] {'loss': 3.154, 'learning_rate': 6.694014092094206e-07, 'epoch': 5.88} + 37%|███▋ | 136580/371472 [10:51:04<19:03:31, 3.42it/s] 37%|███▋ | 136581/371472 [10:51:04<19:18:54, 3.38it/s] 37%|███▋ | 136582/371472 [10:51:04<18:23:04, 3.55it/s] 37%|███▋ | 136583/371472 [10:51:05<17:33:09, 3.72it/s] 37%|███▋ | 136584/371472 [10:51:05<17:55:45, 3.64it/s] 37%|███▋ | 136585/371472 [10:51:05<17:14:34, 3.78it/s] 37%|███▋ | 136586/371472 [10:51:05<17:11:29, 3.80it/s] 37%|███▋ | 136587/371472 [10:51:06<16:38:29, 3.92it/s] 37%|███▋ | 136588/371472 [10:51:06<16:37:09, 3.93it/s] 37%|███▋ | 136589/371472 [10:51:06<18:24:42, 3.54it/s] 37%|███▋ | 136590/371472 [10:51:07<19:03:51, 3.42it/s] 37%|███▋ | 136591/371472 [10:51:07<19:01:14, 3.43it/s] 37%|███▋ | 136592/371472 [10:51:07<19:08:57, 3.41it/s] 37%|███▋ | 136593/371472 [10:51:08<19:29:12, 3.35it/s] 37%|███▋ | 136594/371472 [10:51:08<19:31:47, 3.34it/s] 37%|███▋ | 136595/371472 [10:51:08<19:09:11, 3.41it/s] 37%|███▋ | 136596/371472 [10:51:08<19:06:40, 3.41it/s] 37%|███▋ | 136597/371472 [10:51:09<18:39:29, 3.50it/s] 37%|███▋ | 136598/371472 [10:51:09<18:15:50, 3.57it/s] 37%|███▋ | 136599/371472 [10:51:09<17:39:08, 3.70it/s] 37%|███▋ | 136600/371472 [10:51:10<18:49:00, 3.47it/s] {'loss': 3.1006, 'learning_rate': 6.693529272339417e-07, 'epoch': 5.88} + 37%|███▋ | 136600/371472 [10:51:10<18:49:00, 3.47it/s] 37%|███▋ | 136601/371472 [10:51:10<18:29:29, 3.53it/s] 37%|███▋ | 136602/371472 [10:51:10<18:08:20, 3.60it/s] 37%|███▋ | 136603/371472 [10:51:10<18:59:21, 3.44it/s] 37%|███▋ | 136604/371472 [10:51:11<18:42:17, 3.49it/s] 37%|███▋ | 136605/371472 [10:51:11<18:07:58, 3.60it/s] 37%|███▋ | 136606/371472 [10:51:11<17:34:16, 3.71it/s] 37%|███▋ | 136607/371472 [10:51:11<17:25:15, 3.74it/s] 37%|███▋ | 136608/371472 [10:51:12<17:00:17, 3.84it/s] 37%|███▋ | 136609/371472 [10:51:12<17:06:55, 3.81it/s] 37%|███▋ | 136610/371472 [10:51:12<17:32:59, 3.72it/s] 37%|███▋ | 136611/371472 [10:51:13<17:44:49, 3.68it/s] 37%|███▋ | 136612/371472 [10:51:13<17:01:36, 3.83it/s] 37%|███▋ | 136613/371472 [10:51:13<17:57:57, 3.63it/s] 37%|███▋ | 136614/371472 [10:51:13<19:18:01, 3.38it/s] 37%|███▋ | 136615/371472 [10:51:14<19:17:27, 3.38it/s] 37%|███▋ | 136616/371472 [10:51:14<19:06:14, 3.41it/s] 37%|███▋ | 136617/371472 [10:51:14<18:22:54, 3.55it/s] 37%|███▋ | 136618/371472 [10:51:14<18:01:47, 3.62it/s] 37%|███▋ | 136619/371472 [10:51:15<17:29:13, 3.73it/s] 37%|███▋ | 136620/371472 [10:51:15<17:31:30, 3.72it/s] {'loss': 3.1552, 'learning_rate': 6.693044452584628e-07, 'epoch': 5.88} + 37%|███▋ | 136620/371472 [10:51:15<17:31:30, 3.72it/s] 37%|███▋ | 136621/371472 [10:51:15<17:17:52, 3.77it/s] 37%|███▋ | 136622/371472 [10:51:16<17:17:50, 3.77it/s] 37%|███▋ | 136623/371472 [10:51:16<18:03:39, 3.61it/s] 37%|███▋ | 136624/371472 [10:51:16<17:55:03, 3.64it/s] 37%|███▋ | 136625/371472 [10:51:16<18:40:15, 3.49it/s] 37%|███▋ | 136626/371472 [10:51:17<18:43:12, 3.48it/s] 37%|███▋ | 136627/371472 [10:51:17<18:05:16, 3.61it/s] 37%|███▋ | 136628/371472 [10:51:17<17:46:46, 3.67it/s] 37%|███▋ | 136629/371472 [10:51:18<17:50:46, 3.66it/s] 37%|███▋ | 136630/371472 [10:51:18<17:21:39, 3.76it/s] 37%|███▋ | 136631/371472 [10:51:18<17:24:23, 3.75it/s] 37%|███▋ | 136632/371472 [10:51:18<18:22:57, 3.55it/s] 37%|███▋ | 136633/371472 [10:51:19<17:51:08, 3.65it/s] 37%|███▋ | 136634/371472 [10:51:19<17:45:05, 3.67it/s] 37%|███▋ | 136635/371472 [10:51:19<18:05:44, 3.60it/s] 37%|███▋ | 136636/371472 [10:51:19<18:13:10, 3.58it/s] 37%|███▋ | 136637/371472 [10:51:20<20:11:36, 3.23it/s] 37%|███▋ | 136638/371472 [10:51:20<20:04:46, 3.25it/s] 37%|███▋ | 136639/371472 [10:51:20<19:17:36, 3.38it/s] 37%|███▋ | 136640/371472 [10:51:21<19:15:33, 3.39it/s] {'loss': 3.1886, 'learning_rate': 6.692559632829839e-07, 'epoch': 5.89} + 37%|███▋ | 136640/371472 [10:51:21<19:15:33, 3.39it/s] 37%|███▋ | 136641/371472 [10:51:21<18:42:21, 3.49it/s] 37%|███▋ | 136642/371472 [10:51:21<17:54:25, 3.64it/s] 37%|███▋ | 136643/371472 [10:51:21<17:55:42, 3.64it/s] 37%|███▋ | 136644/371472 [10:51:22<17:34:24, 3.71it/s] 37%|███▋ | 136645/371472 [10:51:22<17:00:06, 3.84it/s] 37%|███▋ | 136646/371472 [10:51:22<17:59:46, 3.62it/s] 37%|███▋ | 136647/371472 [10:51:23<17:15:14, 3.78it/s] 37%|███▋ | 136648/371472 [10:51:23<18:38:46, 3.50it/s] 37%|███▋ | 136649/371472 [10:51:23<18:54:02, 3.45it/s] 37%|███▋ | 136650/371472 [10:51:23<18:43:23, 3.48it/s] 37%|███▋ | 136651/371472 [10:51:24<18:51:57, 3.46it/s] 37%|███▋ | 136652/371472 [10:51:24<18:38:14, 3.50it/s] 37%|███▋ | 136653/371472 [10:51:24<17:40:38, 3.69it/s] 37%|███▋ | 136654/371472 [10:51:25<17:39:53, 3.69it/s] 37%|███▋ | 136655/371472 [10:51:25<18:23:19, 3.55it/s] 37%|███▋ | 136656/371472 [10:51:25<17:42:50, 3.68it/s] 37%|███▋ | 136657/371472 [10:51:25<17:44:49, 3.68it/s] 37%|███▋ | 136658/371472 [10:51:26<17:49:45, 3.66it/s] 37%|███▋ | 136659/371472 [10:51:26<17:13:10, 3.79it/s] 37%|███▋ | 136660/371472 [10:51:26<19:04:36, 3.42it/s] {'loss': 2.9938, 'learning_rate': 6.69207481307505e-07, 'epoch': 5.89} + 37%|███▋ | 136660/371472 [10:51:26<19:04:36, 3.42it/s] 37%|███▋ | 136661/371472 [10:51:27<19:29:30, 3.35it/s] 37%|███▋ | 136662/371472 [10:51:27<19:45:23, 3.30it/s] 37%|███▋ | 136663/371472 [10:51:27<20:49:26, 3.13it/s] 37%|███▋ | 136664/371472 [10:51:27<19:48:53, 3.29it/s] 37%|███▋ | 136665/371472 [10:51:28<20:20:47, 3.21it/s] 37%|███▋ | 136666/371472 [10:51:28<22:41:54, 2.87it/s] 37%|███▋ | 136667/371472 [10:51:29<21:11:54, 3.08it/s] 37%|███▋ | 136668/371472 [10:51:29<20:07:11, 3.24it/s] 37%|███▋ | 136669/371472 [10:51:29<20:39:21, 3.16it/s] 37%|███▋ | 136670/371472 [10:51:29<19:19:10, 3.38it/s] 37%|███▋ | 136671/371472 [10:51:30<20:23:18, 3.20it/s] 37%|███▋ | 136672/371472 [10:51:30<19:29:15, 3.35it/s] 37%|███▋ | 136673/371472 [10:51:30<18:48:03, 3.47it/s] 37%|███▋ | 136674/371472 [10:51:31<18:58:41, 3.44it/s] 37%|███▋ | 136675/371472 [10:51:31<18:44:51, 3.48it/s] 37%|███▋ | 136676/371472 [10:51:31<19:26:33, 3.35it/s] 37%|███▋ | 136677/371472 [10:51:31<18:26:20, 3.54it/s] 37%|███▋ | 136678/371472 [10:51:32<18:12:52, 3.58it/s] 37%|███▋ | 136679/371472 [10:51:32<17:39:21, 3.69it/s] 37%|███▋ | 136680/371472 [10:51:32<17:06:39, 3.81it/s] {'loss': 3.1426, 'learning_rate': 6.691589993320261e-07, 'epoch': 5.89} + 37%|███▋ | 136680/371472 [10:51:32<17:06:39, 3.81it/s] 37%|███▋ | 136681/371472 [10:51:32<18:10:43, 3.59it/s] 37%|███▋ | 136682/371472 [10:51:33<18:47:57, 3.47it/s] 37%|███▋ | 136683/371472 [10:51:33<19:07:21, 3.41it/s] 37%|███▋ | 136684/371472 [10:51:33<18:36:28, 3.50it/s] 37%|███▋ | 136685/371472 [10:51:34<20:58:22, 3.11it/s] 37%|███▋ | 136686/371472 [10:51:34<19:40:24, 3.32it/s] 37%|███▋ | 136687/371472 [10:51:34<18:50:09, 3.46it/s] 37%|███▋ | 136688/371472 [10:51:35<18:10:28, 3.59it/s] 37%|███▋ | 136689/371472 [10:51:35<19:40:45, 3.31it/s] 37%|███▋ | 136690/371472 [10:51:35<18:49:12, 3.47it/s] 37%|███▋ | 136691/371472 [10:51:35<18:10:10, 3.59it/s] 37%|███▋ | 136692/371472 [10:51:36<17:43:54, 3.68it/s] 37%|███▋ | 136693/371472 [10:51:36<20:19:56, 3.21it/s] 37%|███▋ | 136694/371472 [10:51:36<19:39:24, 3.32it/s] 37%|███▋ | 136695/371472 [10:51:37<21:00:26, 3.10it/s] 37%|███▋ | 136696/371472 [10:51:37<19:42:21, 3.31it/s] 37%|███▋ | 136697/371472 [10:51:37<19:22:18, 3.37it/s] 37%|███▋ | 136698/371472 [10:51:38<18:51:57, 3.46it/s] 37%|███▋ | 136699/371472 [10:51:38<18:35:21, 3.51it/s] 37%|███▋ | 136700/371472 [10:51:38<17:44:23, 3.68it/s] {'loss': 3.0142, 'learning_rate': 6.691105173565472e-07, 'epoch': 5.89} + 37%|███▋ | 136700/371472 [10:51:38<17:44:23, 3.68it/s] 37%|███▋ | 136701/371472 [10:51:38<19:32:57, 3.34it/s] 37%|███▋ | 136702/371472 [10:51:39<18:59:58, 3.43it/s] 37%|███▋ | 136703/371472 [10:51:39<19:06:57, 3.41it/s] 37%|███▋ | 136704/371472 [10:51:39<19:28:47, 3.35it/s] 37%|███▋ | 136705/371472 [10:51:40<19:03:38, 3.42it/s] 37%|███▋ | 136706/371472 [10:51:40<18:43:01, 3.48it/s] 37%|███▋ | 136707/371472 [10:51:40<18:04:10, 3.61it/s] 37%|███▋ | 136708/371472 [10:51:40<17:56:05, 3.64it/s] 37%|███▋ | 136709/371472 [10:51:41<18:32:28, 3.52it/s] 37%|███▋ | 136710/371472 [10:51:41<18:09:55, 3.59it/s] 37%|███▋ | 136711/371472 [10:51:41<17:31:54, 3.72it/s] 37%|███▋ | 136712/371472 [10:51:41<17:19:05, 3.77it/s] 37%|███▋ | 136713/371472 [10:51:42<18:44:24, 3.48it/s] 37%|███▋ | 136714/371472 [10:51:42<18:05:19, 3.61it/s] 37%|███▋ | 136715/371472 [10:51:42<19:07:52, 3.41it/s] 37%|███▋ | 136716/371472 [10:51:43<19:32:58, 3.34it/s] 37%|███▋ | 136717/371472 [10:51:43<18:53:16, 3.45it/s] 37%|███▋ | 136718/371472 [10:51:43<18:27:21, 3.53it/s] 37%|███▋ | 136719/371472 [10:51:43<18:10:26, 3.59it/s] 37%|███▋ | 136720/371472 [10:51:44<18:15:11, 3.57it/s] {'loss': 3.0936, 'learning_rate': 6.690620353810683e-07, 'epoch': 5.89} + 37%|███▋ | 136720/371472 [10:51:44<18:15:11, 3.57it/s] 37%|███▋ | 136721/371472 [10:51:44<19:01:55, 3.43it/s] 37%|███▋ | 136722/371472 [10:51:44<19:49:00, 3.29it/s] 37%|███▋ | 136723/371472 [10:51:45<19:29:28, 3.35it/s] 37%|███▋ | 136724/371472 [10:51:45<20:10:31, 3.23it/s] 37%|███▋ | 136725/371472 [10:51:45<20:27:14, 3.19it/s] 37%|███▋ | 136726/371472 [10:51:46<20:25:04, 3.19it/s] 37%|███▋ | 136727/371472 [10:51:46<19:20:13, 3.37it/s] 37%|███▋ | 136728/371472 [10:51:46<18:15:21, 3.57it/s] 37%|███▋ | 136729/371472 [10:51:46<17:49:48, 3.66it/s] 37%|███▋ | 136730/371472 [10:51:47<18:27:18, 3.53it/s] 37%|███▋ | 136731/371472 [10:51:47<17:59:36, 3.62it/s] 37%|███▋ | 136732/371472 [10:51:47<18:37:29, 3.50it/s] 37%|███▋ | 136733/371472 [10:51:48<17:54:06, 3.64it/s] 37%|███▋ | 136734/371472 [10:51:48<18:13:34, 3.58it/s] 37%|███▋ | 136735/371472 [10:51:48<17:54:20, 3.64it/s] 37%|███▋ | 136736/371472 [10:51:48<17:39:44, 3.69it/s] 37%|███▋ | 136737/371472 [10:51:49<17:20:05, 3.76it/s] 37%|███▋ | 136738/371472 [10:51:49<17:48:04, 3.66it/s] 37%|███▋ | 136739/371472 [10:51:49<17:59:16, 3.62it/s] 37%|███▋ | 136740/371472 [10:51:49<18:25:22, 3.54it/s] {'loss': 3.0059, 'learning_rate': 6.690135534055894e-07, 'epoch': 5.89} + 37%|███▋ | 136740/371472 [10:51:49<18:25:22, 3.54it/s] 37%|███▋ | 136741/371472 [10:51:50<18:12:04, 3.58it/s] 37%|███▋ | 136742/371472 [10:51:50<19:42:51, 3.31it/s] 37%|███▋ | 136743/371472 [10:51:50<19:38:48, 3.32it/s] 37%|███▋ | 136744/371472 [10:51:51<18:56:25, 3.44it/s] 37%|███▋ | 136745/371472 [10:51:51<19:22:57, 3.36it/s] 37%|███▋ | 136746/371472 [10:51:51<20:21:51, 3.20it/s] 37%|███▋ | 136747/371472 [10:51:52<19:35:04, 3.33it/s] 37%|███▋ | 136748/371472 [10:51:52<19:15:08, 3.39it/s] 37%|███▋ | 136749/371472 [10:51:52<18:16:46, 3.57it/s] 37%|███▋ | 136750/371472 [10:51:52<17:41:39, 3.68it/s] 37%|███▋ | 136751/371472 [10:51:53<17:35:07, 3.71it/s] 37%|███▋ | 136752/371472 [10:51:53<19:05:51, 3.41it/s] 37%|███▋ | 136753/371472 [10:51:53<18:11:13, 3.58it/s] 37%|███▋ | 136754/371472 [10:51:53<17:43:42, 3.68it/s] 37%|███▋ | 136755/371472 [10:51:54<17:15:59, 3.78it/s] 37%|███▋ | 136756/371472 [10:51:54<17:41:23, 3.69it/s] 37%|███▋ | 136757/371472 [10:51:54<17:47:51, 3.66it/s] 37%|███▋ | 136758/371472 [10:51:55<19:08:25, 3.41it/s] 37%|███▋ | 136759/371472 [10:51:55<19:14:08, 3.39it/s] 37%|███▋ | 136760/371472 [10:51:55<19:05:34, 3.41it/s] {'loss': 3.2446, 'learning_rate': 6.689650714301106e-07, 'epoch': 5.89} + 37%|███▋ | 136760/371472 [10:51:55<19:05:34, 3.41it/s] 37%|███▋ | 136761/371472 [10:51:56<18:51:40, 3.46it/s] 37%|███▋ | 136762/371472 [10:51:56<19:34:40, 3.33it/s] 37%|███▋ | 136763/371472 [10:51:56<18:34:06, 3.51it/s] 37%|███▋ | 136764/371472 [10:51:56<19:16:30, 3.38it/s] 37%|███▋ | 136765/371472 [10:51:57<18:22:15, 3.55it/s] 37%|███▋ | 136766/371472 [10:51:57<18:04:54, 3.61it/s] 37%|███▋ | 136767/371472 [10:51:57<17:36:39, 3.70it/s] 37%|███▋ | 136768/371472 [10:51:57<17:53:42, 3.64it/s] 37%|███▋ | 136769/371472 [10:51:58<17:10:17, 3.80it/s] 37%|███▋ | 136770/371472 [10:51:58<16:55:44, 3.85it/s] 37%|███▋ | 136771/371472 [10:51:58<16:49:13, 3.88it/s] 37%|███▋ | 136772/371472 [10:51:59<18:29:21, 3.53it/s] 37%|███▋ | 136773/371472 [10:51:59<18:17:48, 3.56it/s] 37%|███▋ | 136774/371472 [10:51:59<18:16:59, 3.57it/s] 37%|███▋ | 136775/371472 [10:51:59<18:23:59, 3.54it/s] 37%|███▋ | 136776/371472 [10:52:00<18:13:17, 3.58it/s] 37%|███▋ | 136777/371472 [10:52:00<17:23:41, 3.75it/s] 37%|███▋ | 136778/371472 [10:52:00<17:38:26, 3.70it/s] 37%|███▋ | 136779/371472 [10:52:00<18:22:50, 3.55it/s] 37%|███▋ | 136780/371472 [10:52:01<17:21:21, 3.76it/s] {'loss': 3.1538, 'learning_rate': 6.689165894546316e-07, 'epoch': 5.89} + 37%|███▋ | 136780/371472 [10:52:01<17:21:21, 3.76it/s] 37%|███▋ | 136781/371472 [10:52:01<17:06:48, 3.81it/s] 37%|███▋ | 136782/371472 [10:52:01<17:21:44, 3.75it/s] 37%|███▋ | 136783/371472 [10:52:02<17:23:13, 3.75it/s] 37%|███▋ | 136784/371472 [10:52:02<17:19:56, 3.76it/s] 37%|███▋ | 136785/371472 [10:52:02<17:35:43, 3.71it/s] 37%|███▋ | 136786/371472 [10:52:02<17:41:27, 3.68it/s] 37%|███▋ | 136787/371472 [10:52:03<19:20:17, 3.37it/s] 37%|███▋ | 136788/371472 [10:52:03<20:20:35, 3.20it/s] 37%|███▋ | 136789/371472 [10:52:03<19:39:07, 3.32it/s] 37%|███▋ | 136790/371472 [10:52:04<18:33:02, 3.51it/s] 37%|███▋ | 136791/371472 [10:52:04<17:54:24, 3.64it/s] 37%|███▋ | 136792/371472 [10:52:04<18:20:37, 3.55it/s] 37%|███▋ | 136793/371472 [10:52:04<17:55:56, 3.64it/s] 37%|███▋ | 136794/371472 [10:52:05<17:36:31, 3.70it/s] 37%|███▋ | 136795/371472 [10:52:05<17:09:47, 3.80it/s] 37%|███▋ | 136796/371472 [10:52:05<17:49:03, 3.66it/s] 37%|███▋ | 136797/371472 [10:52:05<17:44:47, 3.67it/s] 37%|███▋ | 136798/371472 [10:52:06<19:02:15, 3.42it/s] 37%|███▋ | 136799/371472 [10:52:06<18:25:35, 3.54it/s] 37%|███▋ | 136800/371472 [10:52:06<17:51:57, 3.65it/s] {'loss': 2.9594, 'learning_rate': 6.688681074791527e-07, 'epoch': 5.89} + 37%|███▋ | 136800/371472 [10:52:06<17:51:57, 3.65it/s] 37%|███▋ | 136801/371472 [10:52:07<17:24:10, 3.75it/s] 37%|███▋ | 136802/371472 [10:52:07<17:55:58, 3.63it/s] 37%|███▋ | 136803/371472 [10:52:07<18:38:09, 3.50it/s] 37%|███▋ | 136804/371472 [10:52:07<18:34:02, 3.51it/s] 37%|███▋ | 136805/371472 [10:52:08<18:54:15, 3.45it/s] 37%|███▋ | 136806/371472 [10:52:08<18:09:19, 3.59it/s] 37%|███▋ | 136807/371472 [10:52:08<19:35:45, 3.33it/s] 37%|███▋ | 136808/371472 [10:52:09<19:27:37, 3.35it/s] 37%|███▋ | 136809/371472 [10:52:09<18:26:51, 3.53it/s] 37%|███▋ | 136810/371472 [10:52:09<18:30:47, 3.52it/s] 37%|███▋ | 136811/371472 [10:52:09<18:05:27, 3.60it/s] 37%|███▋ | 136812/371472 [10:52:10<17:32:49, 3.71it/s] 37%|███▋ | 136813/371472 [10:52:10<18:07:24, 3.60it/s] 37%|███▋ | 136814/371472 [10:52:10<19:26:01, 3.35it/s] 37%|███▋ | 136815/371472 [10:52:11<18:50:09, 3.46it/s] 37%|███▋ | 136816/371472 [10:52:11<18:19:47, 3.56it/s] 37%|███▋ | 136817/371472 [10:52:11<19:01:43, 3.43it/s] 37%|███▋ | 136818/371472 [10:52:11<18:29:53, 3.52it/s] 37%|███▋ | 136819/371472 [10:52:12<19:39:33, 3.32it/s] 37%|███▋ | 136820/371472 [10:52:12<19:32:06, 3.34it/s] {'loss': 3.0342, 'learning_rate': 6.688196255036738e-07, 'epoch': 5.89} + 37%|███▋ | 136820/371472 [10:52:12<19:32:06, 3.34it/s] 37%|███▋ | 136821/371472 [10:52:12<19:43:14, 3.31it/s] 37%|███▋ | 136822/371472 [10:52:13<18:46:40, 3.47it/s] 37%|███▋ | 136823/371472 [10:52:13<19:28:08, 3.35it/s] 37%|███▋ | 136824/371472 [10:52:13<19:51:06, 3.28it/s] 37%|███▋ | 136825/371472 [10:52:14<19:26:54, 3.35it/s] 37%|███▋ | 136826/371472 [10:52:14<19:43:36, 3.30it/s] 37%|███▋ | 136827/371472 [10:52:14<18:39:12, 3.49it/s] 37%|███▋ | 136828/371472 [10:52:14<19:39:27, 3.32it/s] 37%|███▋ | 136829/371472 [10:52:15<18:43:45, 3.48it/s] 37%|███▋ | 136830/371472 [10:52:15<17:55:26, 3.64it/s] 37%|███▋ | 136831/371472 [10:52:15<17:01:06, 3.83it/s] 37%|███▋ | 136832/371472 [10:52:15<16:43:49, 3.90it/s] 37%|███▋ | 136833/371472 [10:52:16<18:18:49, 3.56it/s] 37%|███▋ | 136834/371472 [10:52:16<18:27:25, 3.53it/s] 37%|███▋ | 136835/371472 [10:52:16<17:52:54, 3.64it/s] 37%|███▋ | 136836/371472 [10:52:17<18:19:40, 3.56it/s] 37%|███▋ | 136837/371472 [10:52:17<18:27:40, 3.53it/s] 37%|███▋ | 136838/371472 [10:52:17<18:29:32, 3.52it/s] 37%|███▋ | 136839/371472 [10:52:17<17:49:00, 3.66it/s] 37%|███▋ | 136840/371472 [10:52:18<17:10:32, 3.79it/s] {'loss': 3.1117, 'learning_rate': 6.687711435281948e-07, 'epoch': 5.89} + 37%|███▋ | 136840/371472 [10:52:18<17:10:32, 3.79it/s] 37%|███▋ | 136841/371472 [10:52:18<17:38:43, 3.69it/s] 37%|███▋ | 136842/371472 [10:52:18<18:42:34, 3.48it/s] 37%|███▋ | 136843/371472 [10:52:19<18:24:46, 3.54it/s] 37%|███▋ | 136844/371472 [10:52:19<17:45:57, 3.67it/s] 37%|███▋ | 136845/371472 [10:52:19<18:06:25, 3.60it/s] 37%|███▋ | 136846/371472 [10:52:19<18:51:31, 3.46it/s] 37%|███▋ | 136847/371472 [10:52:20<18:14:08, 3.57it/s] 37%|███▋ | 136848/371472 [10:52:20<17:39:09, 3.69it/s] 37%|███▋ | 136849/371472 [10:52:20<18:46:14, 3.47it/s] 37%|███▋ | 136850/371472 [10:52:21<18:07:01, 3.60it/s] 37%|███▋ | 136851/371472 [10:52:21<17:36:08, 3.70it/s] 37%|███▋ | 136852/371472 [10:52:21<17:50:16, 3.65it/s] 37%|███▋ | 136853/371472 [10:52:21<17:08:40, 3.80it/s] 37%|███▋ | 136854/371472 [10:52:22<16:55:38, 3.85it/s] 37%|███▋ | 136855/371472 [10:52:22<17:31:23, 3.72it/s] 37%|███▋ | 136856/371472 [10:52:22<19:03:39, 3.42it/s] 37%|███▋ | 136857/371472 [10:52:22<18:12:32, 3.58it/s] 37%|███▋ | 136858/371472 [10:52:23<18:37:05, 3.50it/s] 37%|███▋ | 136859/371472 [10:52:23<17:59:01, 3.62it/s] 37%|███▋ | 136860/371472 [10:52:23<18:31:09, 3.52it/s] {'loss': 2.9883, 'learning_rate': 6.68722661552716e-07, 'epoch': 5.89} + 37%|███▋ | 136860/371472 [10:52:23<18:31:09, 3.52it/s] 37%|███▋ | 136861/371472 [10:52:24<18:49:05, 3.46it/s] 37%|███▋ | 136862/371472 [10:52:24<18:57:49, 3.44it/s] 37%|███▋ | 136863/371472 [10:52:24<18:32:14, 3.52it/s] 37%|███▋ | 136864/371472 [10:52:24<18:21:18, 3.55it/s] 37%|███▋ | 136865/371472 [10:52:25<17:49:42, 3.66it/s] 37%|███▋ | 136866/371472 [10:52:25<17:38:51, 3.69it/s] 37%|███▋ | 136867/371472 [10:52:25<17:21:03, 3.76it/s] 37%|███▋ | 136868/371472 [10:52:25<17:03:23, 3.82it/s] 37%|███▋ | 136869/371472 [10:52:26<17:22:04, 3.75it/s] 37%|███▋ | 136870/371472 [10:52:26<17:58:25, 3.63it/s] 37%|███▋ | 136871/371472 [10:52:26<17:45:05, 3.67it/s] 37%|███▋ | 136872/371472 [10:52:27<17:52:31, 3.65it/s] 37%|███▋ | 136873/371472 [10:52:27<18:11:15, 3.58it/s] 37%|███▋ | 136874/371472 [10:52:27<17:52:49, 3.64it/s] 37%|███▋ | 136875/371472 [10:52:27<19:41:16, 3.31it/s] 37%|███▋ | 136876/371472 [10:52:28<18:40:37, 3.49it/s] 37%|███▋ | 136877/371472 [10:52:28<18:48:51, 3.46it/s] 37%|███▋ | 136878/371472 [10:52:28<19:19:43, 3.37it/s] 37%|███▋ | 136879/371472 [10:52:29<18:17:08, 3.56it/s] 37%|███▋ | 136880/371472 [10:52:29<19:29:47, 3.34it/s] {'loss': 3.039, 'learning_rate': 6.686741795772372e-07, 'epoch': 5.9} + 37%|███▋ | 136880/371472 [10:52:29<19:29:47, 3.34it/s] 37%|███▋ | 136881/371472 [10:52:29<19:12:43, 3.39it/s] 37%|███▋ | 136882/371472 [10:52:30<19:15:11, 3.38it/s] 37%|███▋ | 136883/371472 [10:52:30<19:02:31, 3.42it/s] 37%|███▋ | 136884/371472 [10:52:30<18:24:50, 3.54it/s] 37%|███▋ | 136885/371472 [10:52:30<17:23:38, 3.75it/s] 37%|███▋ | 136886/371472 [10:52:31<17:05:16, 3.81it/s] 37%|███▋ | 136887/371472 [10:52:31<17:19:20, 3.76it/s] 37%|███▋ | 136888/371472 [10:52:31<17:21:29, 3.75it/s] 37%|███▋ | 136889/371472 [10:52:31<17:34:28, 3.71it/s] 37%|███▋ | 136890/371472 [10:52:32<17:03:25, 3.82it/s] 37%|███▋ | 136891/371472 [10:52:32<17:53:55, 3.64it/s] 37%|███▋ | 136892/371472 [10:52:32<17:24:43, 3.74it/s] 37%|███▋ | 136893/371472 [10:52:32<17:55:27, 3.64it/s] 37%|███▋ | 136894/371472 [10:52:33<17:04:38, 3.82it/s] 37%|███▋ | 136895/371472 [10:52:33<17:41:57, 3.68it/s] 37%|███▋ | 136896/371472 [10:52:33<18:24:48, 3.54it/s] 37%|███▋ | 136897/371472 [10:52:34<18:13:27, 3.58it/s] 37%|███▋ | 136898/371472 [10:52:34<19:37:45, 3.32it/s] 37%|███▋ | 136899/371472 [10:52:34<18:25:57, 3.53it/s] 37%|███▋ | 136900/371472 [10:52:34<17:42:33, 3.68it/s] {'loss': 3.1795, 'learning_rate': 6.686256976017583e-07, 'epoch': 5.9} + 37%|███▋ | 136900/371472 [10:52:34<17:42:33, 3.68it/s] 37%|███▋ | 136901/371472 [10:52:35<17:45:22, 3.67it/s] 37%|███▋ | 136902/371472 [10:52:35<18:20:38, 3.55it/s] 37%|███▋ | 136903/371472 [10:52:35<17:58:18, 3.63it/s] 37%|███▋ | 136904/371472 [10:52:35<17:33:21, 3.71it/s] 37%|███▋ | 136905/371472 [10:52:36<17:10:40, 3.79it/s] 37%|███▋ | 136906/371472 [10:52:36<17:24:37, 3.74it/s] 37%|███▋ | 136907/371472 [10:52:36<18:58:48, 3.43it/s] 37%|███▋ | 136908/371472 [10:52:37<18:21:09, 3.55it/s] 37%|███▋ | 136909/371472 [10:52:37<18:38:45, 3.49it/s] 37%|███▋ | 136910/371472 [10:52:37<18:29:24, 3.52it/s] 37%|███▋ | 136911/371472 [10:52:37<17:53:22, 3.64it/s] 37%|███▋ | 136912/371472 [10:52:38<18:07:25, 3.60it/s] 37%|███▋ | 136913/371472 [10:52:38<17:37:31, 3.70it/s] 37%|███▋ | 136914/371472 [10:52:38<19:41:40, 3.31it/s] 37%|███▋ | 136915/371472 [10:52:39<18:58:47, 3.43it/s] 37%|███▋ | 136916/371472 [10:52:39<18:01:08, 3.62it/s] 37%|███▋ | 136917/371472 [10:52:39<17:13:19, 3.78it/s] 37%|███▋ | 136918/371472 [10:52:39<17:47:28, 3.66it/s] 37%|███▋ | 136919/371472 [10:52:40<17:26:06, 3.74it/s] 37%|███▋ | 136920/371472 [10:52:40<17:37:22, 3.70it/s] {'loss': 3.0312, 'learning_rate': 6.685772156262793e-07, 'epoch': 5.9} + 37%|███▋ | 136920/371472 [10:52:40<17:37:22, 3.70it/s] 37%|███▋ | 136921/371472 [10:52:40<18:50:11, 3.46it/s] 37%|███▋ | 136922/371472 [10:52:41<19:23:29, 3.36it/s] 37%|███▋ | 136923/371472 [10:52:41<18:51:27, 3.45it/s] 37%|███▋ | 136924/371472 [10:52:41<19:02:32, 3.42it/s] 37%|███▋ | 136925/371472 [10:52:41<19:38:10, 3.32it/s] 37%|███▋ | 136926/371472 [10:52:42<19:40:24, 3.31it/s] 37%|███▋ | 136927/371472 [10:52:42<19:32:20, 3.33it/s] 37%|███▋ | 136928/371472 [10:52:42<19:44:24, 3.30it/s] 37%|███▋ | 136929/371472 [10:52:43<18:54:42, 3.44it/s] 37%|███▋ | 136930/371472 [10:52:43<18:17:50, 3.56it/s] 37%|███▋ | 136931/371472 [10:52:43<17:48:58, 3.66it/s] 37%|███▋ | 136932/371472 [10:52:43<18:25:54, 3.53it/s] 37%|███▋ | 136933/371472 [10:52:44<17:41:58, 3.68it/s] 37%|███▋ | 136934/371472 [10:52:44<17:58:32, 3.62it/s] 37%|███▋ | 136935/371472 [10:52:44<18:03:29, 3.61it/s] 37%|███▋ | 136936/371472 [10:52:45<17:53:30, 3.64it/s] 37%|███▋ | 136937/371472 [10:52:45<19:30:20, 3.34it/s] 37%|███▋ | 136938/371472 [10:52:45<19:37:29, 3.32it/s] 37%|███▋ | 136939/371472 [10:52:46<20:05:17, 3.24it/s] 37%|███▋ | 136940/371472 [10:52:46<20:42:05, 3.15it/s] {'loss': 3.0345, 'learning_rate': 6.685287336508004e-07, 'epoch': 5.9} + 37%|███▋ | 136940/371472 [10:52:46<20:42:05, 3.15it/s] 37%|███▋ | 136941/371472 [10:52:46<20:09:19, 3.23it/s] 37%|███▋ | 136942/371472 [10:52:46<18:49:35, 3.46it/s] 37%|███▋ | 136943/371472 [10:52:47<18:33:19, 3.51it/s] 37%|███▋ | 136944/371472 [10:52:47<18:22:24, 3.55it/s] 37%|███▋ | 136945/371472 [10:52:47<19:39:55, 3.31it/s] 37%|███▋ | 136946/371472 [10:52:48<18:40:20, 3.49it/s] 37%|███▋ | 136947/371472 [10:52:48<17:49:35, 3.65it/s] 37%|███▋ | 136948/371472 [10:52:48<17:27:26, 3.73it/s] 37%|███▋ | 136949/371472 [10:52:48<17:29:27, 3.72it/s] 37%|███▋ | 136950/371472 [10:52:49<18:14:24, 3.57it/s] 37%|███▋ | 136951/371472 [10:52:49<19:04:18, 3.42it/s] 37%|███▋ | 136952/371472 [10:52:49<18:16:11, 3.57it/s] 37%|███▋ | 136953/371472 [10:52:50<19:04:45, 3.41it/s] 37%|███▋ | 136954/371472 [10:52:50<19:23:56, 3.36it/s] 37%|███▋ | 136955/371472 [10:52:50<18:25:47, 3.53it/s] 37%|███▋ | 136956/371472 [10:52:50<20:26:32, 3.19it/s] 37%|███▋ | 136957/371472 [10:52:51<19:58:14, 3.26it/s] 37%|███▋ | 136958/371472 [10:52:51<18:29:49, 3.52it/s] 37%|███▋ | 136959/371472 [10:52:51<18:09:52, 3.59it/s] 37%|███▋ | 136960/371472 [10:52:52<18:28:29, 3.53it/s] {'loss': 3.0789, 'learning_rate': 6.684802516753216e-07, 'epoch': 5.9} + 37%|███▋ | 136960/371472 [10:52:52<18:28:29, 3.53it/s] 37%|███▋ | 136961/371472 [10:52:52<18:06:09, 3.60it/s] 37%|███▋ | 136962/371472 [10:52:52<18:20:20, 3.55it/s] 37%|███▋ | 136963/371472 [10:52:52<18:32:16, 3.51it/s] 37%|███▋ | 136964/371472 [10:52:53<18:04:42, 3.60it/s] 37%|███▋ | 136965/371472 [10:52:53<17:57:14, 3.63it/s] 37%|███▋ | 136966/371472 [10:52:53<17:33:47, 3.71it/s] 37%|███▋ | 136967/371472 [10:52:53<18:25:24, 3.54it/s] 37%|███▋ | 136968/371472 [10:52:54<18:30:56, 3.52it/s] 37%|███▋ | 136969/371472 [10:52:54<18:35:41, 3.50it/s] 37%|███▋ | 136970/371472 [10:52:54<18:13:03, 3.58it/s] 37%|███▋ | 136971/371472 [10:52:55<19:19:07, 3.37it/s] 37%|███▋ | 136972/371472 [10:52:55<19:14:26, 3.39it/s] 37%|███▋ | 136973/371472 [10:52:55<19:00:19, 3.43it/s] 37%|███▋ | 136974/371472 [10:52:56<18:21:17, 3.55it/s] 37%|███▋ | 136975/371472 [10:52:56<18:22:40, 3.54it/s] 37%|███▋ | 136976/371472 [10:52:56<19:14:33, 3.39it/s] 37%|███▋ | 136977/371472 [10:52:56<18:32:05, 3.51it/s] 37%|███▋ | 136978/371472 [10:52:57<19:23:34, 3.36it/s] 37%|███▋ | 136979/371472 [10:52:57<19:33:48, 3.33it/s] 37%|███▋ | 136980/371472 [10:52:57<18:48:54, 3.46it/s] {'loss': 3.1513, 'learning_rate': 6.684317696998427e-07, 'epoch': 5.9} + 37%|███▋ | 136980/371472 [10:52:57<18:48:54, 3.46it/s] 37%|███▋ | 136981/371472 [10:52:58<19:54:17, 3.27it/s] 37%|███▋ | 136982/371472 [10:52:58<20:19:16, 3.21it/s] 37%|███▋ | 136983/371472 [10:52:58<20:06:20, 3.24it/s] 37%|███▋ | 136984/371472 [10:52:59<19:13:55, 3.39it/s] 37%|███▋ | 136985/371472 [10:52:59<19:07:00, 3.41it/s] 37%|███▋ | 136986/371472 [10:52:59<18:27:42, 3.53it/s] 37%|███▋ | 136987/371472 [10:52:59<18:28:12, 3.53it/s] 37%|███▋ | 136988/371472 [10:53:00<18:27:06, 3.53it/s] 37%|███▋ | 136989/371472 [10:53:00<18:46:29, 3.47it/s] 37%|███▋ | 136990/371472 [10:53:00<18:54:46, 3.44it/s] 37%|███▋ | 136991/371472 [10:53:00<18:33:45, 3.51it/s] 37%|███▋ | 136992/371472 [10:53:01<18:25:26, 3.54it/s] 37%|███▋ | 136993/371472 [10:53:01<18:19:56, 3.55it/s] 37%|███▋ | 136994/371472 [10:53:01<18:20:53, 3.55it/s] 37%|███▋ | 136995/371472 [10:53:02<18:49:30, 3.46it/s] 37%|███▋ | 136996/371472 [10:53:02<18:42:37, 3.48it/s] 37%|███▋ | 136997/371472 [10:53:02<18:47:43, 3.47it/s] 37%|███▋ | 136998/371472 [10:53:03<19:00:05, 3.43it/s] 37%|███▋ | 136999/371472 [10:53:03<18:23:03, 3.54it/s] 37%|███▋ | 137000/371472 [10:53:03<18:02:32, 3.61it/s] {'loss': 2.986, 'learning_rate': 6.683832877243638e-07, 'epoch': 5.9} + 37%|███▋ | 137000/371472 [10:53:03<18:02:32, 3.61it/s] 37%|███▋ | 137001/371472 [10:53:03<18:25:11, 3.54it/s] 37%|███▋ | 137002/371472 [10:53:04<18:58:27, 3.43it/s] 37%|███▋ | 137003/371472 [10:53:04<18:53:04, 3.45it/s] 37%|███▋ | 137004/371472 [10:53:04<17:53:02, 3.64it/s] 37%|███▋ | 137005/371472 [10:53:04<18:19:57, 3.55it/s] 37%|███▋ | 137006/371472 [10:53:05<19:01:28, 3.42it/s] 37%|███▋ | 137007/371472 [10:53:05<18:55:55, 3.44it/s] 37%|███▋ | 137008/371472 [10:53:05<18:55:31, 3.44it/s] 37%|███▋ | 137009/371472 [10:53:06<20:00:56, 3.25it/s] 37%|███▋ | 137010/371472 [10:53:06<20:23:27, 3.19it/s] 37%|███▋ | 137011/371472 [10:53:06<19:53:58, 3.27it/s] 37%|███▋ | 137012/371472 [10:53:07<19:10:59, 3.40it/s] 37%|███▋ | 137013/371472 [10:53:07<19:45:27, 3.30it/s] 37%|███▋ | 137014/371472 [10:53:07<19:00:13, 3.43it/s] 37%|███▋ | 137015/371472 [10:53:07<18:37:08, 3.50it/s] 37%|███▋ | 137016/371472 [10:53:08<18:52:07, 3.45it/s] 37%|███▋ | 137017/371472 [10:53:08<18:37:41, 3.50it/s] 37%|███▋ | 137018/371472 [10:53:08<19:49:35, 3.28it/s] 37%|███▋ | 137019/371472 [10:53:09<19:24:16, 3.36it/s] 37%|███▋ | 137020/371472 [10:53:09<18:43:01, 3.48it/s] {'loss': 3.0019, 'learning_rate': 6.683348057488849e-07, 'epoch': 5.9} + 37%|███▋ | 137020/371472 [10:53:09<18:43:01, 3.48it/s] 37%|███▋ | 137021/371472 [10:53:09<17:56:25, 3.63it/s] 37%|███▋ | 137022/371472 [10:53:09<18:04:43, 3.60it/s] 37%|███▋ | 137023/371472 [10:53:10<18:11:01, 3.58it/s] 37%|███▋ | 137024/371472 [10:53:10<18:37:18, 3.50it/s] 37%|███▋ | 137025/371472 [10:53:10<18:23:34, 3.54it/s] 37%|███▋ | 137026/371472 [10:53:11<18:53:19, 3.45it/s] 37%|███▋ | 137027/371472 [10:53:11<18:14:50, 3.57it/s] 37%|███▋ | 137028/371472 [10:53:11<17:28:07, 3.73it/s] 37%|███▋ | 137029/371472 [10:53:11<17:02:08, 3.82it/s] 37%|███▋ | 137030/371472 [10:53:12<17:22:49, 3.75it/s] 37%|███▋ | 137031/371472 [10:53:12<17:48:05, 3.66it/s] 37%|███▋ | 137032/371472 [10:53:12<19:58:05, 3.26it/s] 37%|███▋ | 137033/371472 [10:53:13<20:02:11, 3.25it/s] 37%|███▋ | 137034/371472 [10:53:13<19:51:55, 3.28it/s] 37%|███▋ | 137035/371472 [10:53:13<22:28:35, 2.90it/s] 37%|███▋ | 137036/371472 [10:53:14<21:21:58, 3.05it/s] 37%|███▋ | 137037/371472 [10:53:14<20:00:16, 3.26it/s] 37%|███▋ | 137038/371472 [10:53:14<19:58:51, 3.26it/s] 37%|███▋ | 137039/371472 [10:53:15<21:20:52, 3.05it/s] 37%|███▋ | 137040/371472 [10:53:15<20:29:10, 3.18it/s] {'loss': 3.088, 'learning_rate': 6.682863237734059e-07, 'epoch': 5.9} + 37%|███▋ | 137040/371472 [10:53:15<20:29:10, 3.18it/s] 37%|███▋ | 137041/371472 [10:53:15<22:53:25, 2.84it/s] 37%|███▋ | 137042/371472 [10:53:16<20:49:23, 3.13it/s] 37%|███▋ | 137043/371472 [10:53:16<20:33:50, 3.17it/s] 37%|███▋ | 137044/371472 [10:53:16<19:51:26, 3.28it/s] 37%|███▋ | 137045/371472 [10:53:16<20:02:57, 3.25it/s] 37%|███▋ | 137046/371472 [10:53:17<19:48:13, 3.29it/s] 37%|███▋ | 137047/371472 [10:53:17<18:47:40, 3.46it/s] 37%|███▋ | 137048/371472 [10:53:17<19:19:18, 3.37it/s] 37%|███▋ | 137049/371472 [10:53:18<18:50:22, 3.46it/s] 37%|███▋ | 137050/371472 [10:53:18<18:35:44, 3.50it/s] 37%|███▋ | 137051/371472 [10:53:18<18:47:08, 3.47it/s] 37%|███▋ | 137052/371472 [10:53:18<19:00:45, 3.42it/s] 37%|███▋ | 137053/371472 [10:53:19<18:33:04, 3.51it/s] 37%|███▋ | 137054/371472 [10:53:19<19:12:02, 3.39it/s] 37%|███▋ | 137055/371472 [10:53:19<18:25:42, 3.53it/s] 37%|███▋ | 137056/371472 [10:53:20<18:12:40, 3.58it/s] 37%|███▋ | 137057/371472 [10:53:20<19:28:10, 3.34it/s] 37%|███▋ | 137058/371472 [10:53:20<20:17:21, 3.21it/s] 37%|███▋ | 137059/371472 [10:53:21<19:21:25, 3.36it/s] 37%|███▋ | 137060/371472 [10:53:21<19:11:30, 3.39it/s] {'loss': 3.0026, 'learning_rate': 6.682378417979271e-07, 'epoch': 5.9} + 37%|███▋ | 137060/371472 [10:53:21<19:11:30, 3.39it/s] 37%|███▋ | 137061/371472 [10:53:21<18:43:25, 3.48it/s] 37%|███▋ | 137062/371472 [10:53:21<18:04:01, 3.60it/s] 37%|███▋ | 137063/371472 [10:53:22<18:04:58, 3.60it/s] 37%|███▋ | 137064/371472 [10:53:22<18:10:56, 3.58it/s] 37%|███▋ | 137065/371472 [10:53:22<17:55:05, 3.63it/s] 37%|███▋ | 137066/371472 [10:53:22<17:25:57, 3.74it/s] 37%|███▋ | 137067/371472 [10:53:23<17:59:31, 3.62it/s] 37%|███▋ | 137068/371472 [10:53:23<18:16:37, 3.56it/s] 37%|███▋ | 137069/371472 [10:53:23<17:50:15, 3.65it/s] 37%|███▋ | 137070/371472 [10:53:23<16:54:54, 3.85it/s] 37%|███▋ | 137071/371472 [10:53:24<16:25:08, 3.97it/s] 37%|███▋ | 137072/371472 [10:53:24<16:04:52, 4.05it/s] 37%|███▋ | 137073/371472 [10:53:24<17:02:58, 3.82it/s] 37%|███▋ | 137074/371472 [10:53:24<16:46:20, 3.88it/s] 37%|███▋ | 137075/371472 [10:53:25<16:59:21, 3.83it/s] 37%|███▋ | 137076/371472 [10:53:25<17:00:42, 3.83it/s] 37%|███▋ | 137077/371472 [10:53:25<16:39:15, 3.91it/s] 37%|███▋ | 137078/371472 [10:53:26<16:36:22, 3.92it/s] 37%|███▋ | 137079/371472 [10:53:26<16:56:30, 3.84it/s] 37%|███▋ | 137080/371472 [10:53:26<16:48:31, 3.87it/s] {'loss': 3.2295, 'learning_rate': 6.681893598224482e-07, 'epoch': 5.9} + 37%|███▋ | 137080/371472 [10:53:26<16:48:31, 3.87it/s] 37%|███▋ | 137081/371472 [10:53:26<19:53:49, 3.27it/s] 37%|███▋ | 137082/371472 [10:53:27<18:52:44, 3.45it/s] 37%|███▋ | 137083/371472 [10:53:27<18:09:18, 3.59it/s] 37%|███▋ | 137084/371472 [10:53:27<19:53:18, 3.27it/s] 37%|███▋ | 137085/371472 [10:53:28<20:32:51, 3.17it/s] 37%|███▋ | 137086/371472 [10:53:28<19:29:18, 3.34it/s] 37%|███▋ | 137087/371472 [10:53:28<20:11:23, 3.22it/s] 37%|███▋ | 137088/371472 [10:53:29<19:04:33, 3.41it/s] 37%|███▋ | 137089/371472 [10:53:29<18:00:39, 3.61it/s] 37%|███▋ | 137090/371472 [10:53:29<18:25:44, 3.53it/s] 37%|███▋ | 137091/371472 [10:53:29<17:41:15, 3.68it/s] 37%|███▋ | 137092/371472 [10:53:30<17:55:11, 3.63it/s] 37%|███▋ | 137093/371472 [10:53:30<18:42:24, 3.48it/s] 37%|███▋ | 137094/371472 [10:53:30<19:06:12, 3.41it/s] 37%|███▋ | 137095/371472 [10:53:31<18:55:49, 3.44it/s] 37%|███▋ | 137096/371472 [10:53:31<18:00:36, 3.61it/s] 37%|███▋ | 137097/371472 [10:53:31<17:57:40, 3.62it/s] 37%|███▋ | 137098/371472 [10:53:31<17:34:03, 3.71it/s] 37%|███▋ | 137099/371472 [10:53:32<17:17:25, 3.77it/s] 37%|███▋ | 137100/371472 [10:53:32<17:40:21, 3.68it/s] {'loss': 3.1641, 'learning_rate': 6.681408778469693e-07, 'epoch': 5.91} + 37%|███▋ | 137100/371472 [10:53:32<17:40:21, 3.68it/s] 37%|███▋ | 137101/371472 [10:53:32<17:53:51, 3.64it/s] 37%|███▋ | 137102/371472 [10:53:32<17:52:48, 3.64it/s] 37%|███▋ | 137103/371472 [10:53:33<17:08:32, 3.80it/s] 37%|███▋ | 137104/371472 [10:53:33<18:40:57, 3.48it/s] 37%|███▋ | 137105/371472 [10:53:33<18:26:02, 3.53it/s] 37%|███▋ | 137106/371472 [10:53:34<19:51:55, 3.28it/s] 37%|███▋ | 137107/371472 [10:53:34<18:48:02, 3.46it/s] 37%|███▋ | 137108/371472 [10:53:34<18:38:35, 3.49it/s] 37%|███▋ | 137109/371472 [10:53:34<18:23:00, 3.54it/s] 37%|███▋ | 137110/371472 [10:53:35<19:27:14, 3.35it/s] 37%|███▋ | 137111/371472 [10:53:35<19:37:31, 3.32it/s] 37%|███▋ | 137112/371472 [10:53:35<18:13:21, 3.57it/s] 37%|███▋ | 137113/371472 [10:53:36<17:32:29, 3.71it/s] 37%|███▋ | 137114/371472 [10:53:36<16:54:27, 3.85it/s] 37%|███▋ | 137115/371472 [10:53:36<16:29:47, 3.95it/s] 37%|███▋ | 137116/371472 [10:53:36<16:32:41, 3.93it/s] 37%|███▋ | 137117/371472 [10:53:37<17:09:05, 3.80it/s] 37%|███▋ | 137118/371472 [10:53:37<16:49:42, 3.87it/s] 37%|███▋ | 137119/371472 [10:53:37<16:39:43, 3.91it/s] 37%|███▋ | 137120/371472 [10:53:37<16:52:14, 3.86it/s] {'loss': 3.2013, 'learning_rate': 6.680923958714904e-07, 'epoch': 5.91} + 37%|███▋ | 137120/371472 [10:53:37<16:52:14, 3.86it/s] 37%|███▋ | 137121/371472 [10:53:38<18:56:42, 3.44it/s] 37%|███▋ | 137122/371472 [10:53:38<18:04:54, 3.60it/s] 37%|███▋ | 137123/371472 [10:53:38<17:48:01, 3.66it/s] 37%|███▋ | 137124/371472 [10:53:39<19:35:17, 3.32it/s] 37%|███▋ | 137125/371472 [10:53:39<19:10:26, 3.40it/s] 37%|███▋ | 137126/371472 [10:53:39<18:26:44, 3.53it/s] 37%|███▋ | 137127/371472 [10:53:39<17:46:50, 3.66it/s] 37%|███▋ | 137128/371472 [10:53:40<18:40:30, 3.49it/s] 37%|███▋ | 137129/371472 [10:53:40<17:52:43, 3.64it/s] 37%|███▋ | 137130/371472 [10:53:40<17:32:02, 3.71it/s] 37%|███▋ | 137131/371472 [10:53:40<16:47:24, 3.88it/s] 37%|███▋ | 137132/371472 [10:53:41<16:45:47, 3.88it/s] 37%|███▋ | 137133/371472 [10:53:41<17:52:25, 3.64it/s] 37%|███▋ | 137134/371472 [10:53:41<19:08:48, 3.40it/s] 37%|███▋ | 137135/371472 [10:53:42<19:05:34, 3.41it/s] 37%|███▋ | 137136/371472 [10:53:42<18:20:16, 3.55it/s] 37%|███▋ | 137137/371472 [10:53:42<17:22:03, 3.75it/s] 37%|███▋ | 137138/371472 [10:53:42<17:04:34, 3.81it/s] 37%|███▋ | 137139/371472 [10:53:43<16:48:23, 3.87it/s] 37%|███▋ | 137140/371472 [10:53:43<16:39:41, 3.91it/s] {'loss': 3.0945, 'learning_rate': 6.680439138960116e-07, 'epoch': 5.91} + 37%|███▋ | 137140/371472 [10:53:43<16:39:41, 3.91it/s] 37%|███▋ | 137141/371472 [10:53:43<17:01:57, 3.82it/s] 37%|███▋ | 137142/371472 [10:53:43<17:35:00, 3.70it/s] 37%|███▋ | 137143/371472 [10:53:44<19:32:22, 3.33it/s] 37%|███▋ | 137144/371472 [10:53:44<18:32:49, 3.51it/s] 37%|███▋ | 137145/371472 [10:53:44<18:07:02, 3.59it/s] 37%|███▋ | 137146/371472 [10:53:45<17:37:57, 3.69it/s] 37%|███▋ | 137147/371472 [10:53:45<18:50:28, 3.45it/s] 37%|███▋ | 137148/371472 [10:53:45<18:59:36, 3.43it/s] 37%|███▋ | 137149/371472 [10:53:45<18:43:22, 3.48it/s] 37%|███▋ | 137150/371472 [10:53:46<17:59:51, 3.62it/s] 37%|███▋ | 137151/371472 [10:53:46<17:50:26, 3.65it/s] 37%|███▋ | 137152/371472 [10:53:46<17:38:41, 3.69it/s] 37%|███▋ | 137153/371472 [10:53:46<17:37:03, 3.69it/s] 37%|███▋ | 137154/371472 [10:53:47<19:53:24, 3.27it/s] 37%|███▋ | 137155/371472 [10:53:47<18:33:47, 3.51it/s] 37%|███▋ | 137156/371472 [10:53:47<17:57:18, 3.63it/s] 37%|███▋ | 137157/371472 [10:53:48<19:55:34, 3.27it/s] 37%|███▋ | 137158/371472 [10:53:48<20:27:06, 3.18it/s] 37%|███▋ | 137159/371472 [10:53:48<20:14:13, 3.22it/s] 37%|███▋ | 137160/371472 [10:53:49<19:13:51, 3.38it/s] {'loss': 3.1501, 'learning_rate': 6.679954319205326e-07, 'epoch': 5.91} + 37%|███▋ | 137160/371472 [10:53:49<19:13:51, 3.38it/s] 37%|███▋ | 137161/371472 [10:53:49<18:23:10, 3.54it/s] 37%|███▋ | 137162/371472 [10:53:49<18:25:09, 3.53it/s] 37%|███▋ | 137163/371472 [10:53:49<18:59:43, 3.43it/s] 37%|███▋ | 137164/371472 [10:53:50<18:35:36, 3.50it/s] 37%|███▋ | 137165/371472 [10:53:50<18:08:52, 3.59it/s] 37%|███▋ | 137166/371472 [10:53:50<18:02:56, 3.61it/s] 37%|███▋ | 137167/371472 [10:53:51<18:09:20, 3.58it/s] 37%|███▋ | 137168/371472 [10:53:51<18:06:07, 3.60it/s] 37%|███▋ | 137169/371472 [10:53:51<18:09:45, 3.58it/s] 37%|███▋ | 137170/371472 [10:53:51<17:38:02, 3.69it/s] 37%|███▋ | 137171/371472 [10:53:52<18:18:53, 3.55it/s] 37%|███▋ | 137172/371472 [10:53:52<18:09:14, 3.59it/s] 37%|███▋ | 137173/371472 [10:53:52<17:39:08, 3.69it/s] 37%|███▋ | 137174/371472 [10:53:53<18:19:44, 3.55it/s] 37%|███▋ | 137175/371472 [10:53:53<18:00:08, 3.62it/s] 37%|███▋ | 137176/371472 [10:53:53<17:43:08, 3.67it/s] 37%|███▋ | 137177/371472 [10:53:53<18:40:15, 3.49it/s] 37%|███▋ | 137178/371472 [10:53:54<17:57:47, 3.62it/s] 37%|███▋ | 137179/371472 [10:53:54<17:25:11, 3.74it/s] 37%|███▋ | 137180/371472 [10:53:54<17:35:24, 3.70it/s] {'loss': 3.1245, 'learning_rate': 6.679469499450537e-07, 'epoch': 5.91} + 37%|███▋ | 137180/371472 [10:53:54<17:35:24, 3.70it/s] 37%|███▋ | 137181/371472 [10:53:54<18:09:30, 3.58it/s] 37%|███▋ | 137182/371472 [10:53:55<17:25:03, 3.74it/s] 37%|███▋ | 137183/371472 [10:53:55<17:24:02, 3.74it/s] 37%|███▋ | 137184/371472 [10:53:55<17:57:14, 3.62it/s] 37%|███▋ | 137185/371472 [10:53:56<19:18:34, 3.37it/s] 37%|███▋ | 137186/371472 [10:53:56<19:10:39, 3.39it/s] 37%|███▋ | 137187/371472 [10:53:56<18:07:53, 3.59it/s] 37%|███▋ | 137188/371472 [10:53:56<18:05:45, 3.60it/s] 37%|███▋ | 137189/371472 [10:53:57<17:48:52, 3.65it/s] 37%|███▋ | 137190/371472 [10:53:57<18:04:24, 3.60it/s] 37%|███▋ | 137191/371472 [10:53:57<17:35:16, 3.70it/s] 37%|███▋ | 137192/371472 [10:53:58<19:14:49, 3.38it/s] 37%|███▋ | 137193/371472 [10:53:58<18:38:09, 3.49it/s] 37%|███▋ | 137194/371472 [10:53:58<20:30:17, 3.17it/s] 37%|███▋ | 137195/371472 [10:53:58<19:48:17, 3.29it/s] 37%|███▋ | 137196/371472 [10:53:59<19:29:24, 3.34it/s] 37%|███▋ | 137197/371472 [10:53:59<18:45:59, 3.47it/s] 37%|███▋ | 137198/371472 [10:53:59<18:46:43, 3.47it/s] 37%|███▋ | 137199/371472 [10:54:00<18:47:13, 3.46it/s] 37%|███▋ | 137200/371472 [10:54:00<18:58:08, 3.43it/s] {'loss': 2.8896, 'learning_rate': 6.678984679695748e-07, 'epoch': 5.91} + 37%|███▋ | 137200/371472 [10:54:00<18:58:08, 3.43it/s] 37%|███▋ | 137201/371472 [10:54:00<19:13:05, 3.39it/s] 37%|███▋ | 137202/371472 [10:54:01<20:33:03, 3.17it/s] 37%|███▋ | 137203/371472 [10:54:01<19:27:33, 3.34it/s] 37%|███▋ | 137204/371472 [10:54:01<19:23:47, 3.35it/s] 37%|███▋ | 137205/371472 [10:54:01<19:06:45, 3.40it/s] 37%|███▋ | 137206/371472 [10:54:02<18:10:56, 3.58it/s] 37%|███▋ | 137207/371472 [10:54:02<18:00:06, 3.61it/s] 37%|███▋ | 137208/371472 [10:54:02<20:52:27, 3.12it/s] 37%|███▋ | 137209/371472 [10:54:03<21:08:33, 3.08it/s] 37%|███▋ | 137210/371472 [10:54:03<20:55:30, 3.11it/s] 37%|███▋ | 137211/371472 [10:54:03<19:09:44, 3.40it/s] 37%|███▋ | 137212/371472 [10:54:04<18:46:46, 3.47it/s] 37%|███▋ | 137213/371472 [10:54:04<18:27:38, 3.52it/s] 37%|███▋ | 137214/371472 [10:54:04<17:41:31, 3.68it/s] 37%|███▋ | 137215/371472 [10:54:04<19:01:10, 3.42it/s] 37%|███▋ | 137216/371472 [10:54:05<20:12:22, 3.22it/s] 37%|███▋ | 137217/371472 [10:54:05<19:16:41, 3.38it/s] 37%|███▋ | 137218/371472 [10:54:05<20:02:44, 3.25it/s] 37%|███▋ | 137219/371472 [10:54:06<21:23:50, 3.04it/s] 37%|███▋ | 137220/371472 [10:54:06<19:54:56, 3.27it/s] {'loss': 2.9712, 'learning_rate': 6.67849985994096e-07, 'epoch': 5.91} + 37%|███▋ | 137220/371472 [10:54:06<19:54:56, 3.27it/s] 37%|███▋ | 137221/371472 [10:54:06<18:39:01, 3.49it/s] 37%|███▋ | 137222/371472 [10:54:07<19:22:55, 3.36it/s] 37%|███▋ | 137223/371472 [10:54:07<19:36:38, 3.32it/s] 37%|███▋ | 137224/371472 [10:54:07<19:33:15, 3.33it/s] 37%|███▋ | 137225/371472 [10:54:07<19:07:36, 3.40it/s] 37%|███▋ | 137226/371472 [10:54:08<18:26:49, 3.53it/s] 37%|███▋ | 137227/371472 [10:54:08<18:16:43, 3.56it/s] 37%|███▋ | 137228/371472 [10:54:08<19:04:30, 3.41it/s] 37%|███▋ | 137229/371472 [10:54:08<18:08:16, 3.59it/s] 37%|███▋ | 137230/371472 [10:54:09<18:22:27, 3.54it/s] 37%|███▋ | 137231/371472 [10:54:09<18:13:13, 3.57it/s] 37%|███▋ | 137232/371472 [10:54:09<19:19:45, 3.37it/s] 37%|███▋ | 137233/371472 [10:54:10<18:07:33, 3.59it/s] 37%|███▋ | 137234/371472 [10:54:10<18:32:01, 3.51it/s] 37%|███▋ | 137235/371472 [10:54:10<18:14:51, 3.57it/s] 37%|███▋ | 137236/371472 [10:54:10<17:51:51, 3.64it/s] 37%|███▋ | 137237/371472 [10:54:11<17:01:39, 3.82it/s] 37%|███▋ | 137238/371472 [10:54:11<18:29:09, 3.52it/s] 37%|███▋ | 137239/371472 [10:54:11<18:19:45, 3.55it/s] 37%|███▋ | 137240/371472 [10:54:12<17:44:02, 3.67it/s] {'loss': 3.2945, 'learning_rate': 6.678015040186171e-07, 'epoch': 5.91} + 37%|███▋ | 137240/371472 [10:54:12<17:44:02, 3.67it/s] 37%|███▋ | 137241/371472 [10:54:12<17:54:17, 3.63it/s] 37%|███▋ | 137242/371472 [10:54:12<17:24:41, 3.74it/s] 37%|███▋ | 137243/371472 [10:54:12<16:57:55, 3.84it/s] 37%|███▋ | 137244/371472 [10:54:13<17:29:43, 3.72it/s] 37%|███▋ | 137245/371472 [10:54:13<17:28:29, 3.72it/s] 37%|███▋ | 137246/371472 [10:54:13<17:18:51, 3.76it/s] 37%|███▋ | 137247/371472 [10:54:13<17:53:04, 3.64it/s] 37%|███▋ | 137248/371472 [10:54:14<17:38:59, 3.69it/s] 37%|███▋ | 137249/371472 [10:54:14<16:50:39, 3.86it/s] 37%|███▋ | 137250/371472 [10:54:14<17:53:08, 3.64it/s] 37%|███▋ | 137251/371472 [10:54:14<17:01:50, 3.82it/s] 37%|███▋ | 137252/371472 [10:54:15<17:03:45, 3.81it/s] 37%|███▋ | 137253/371472 [10:54:15<17:18:01, 3.76it/s] 37%|███▋ | 137254/371472 [10:54:15<16:40:12, 3.90it/s] 37%|███▋ | 137255/371472 [10:54:16<17:39:50, 3.68it/s] 37%|███▋ | 137256/371472 [10:54:16<17:18:41, 3.76it/s] 37%|███▋ | 137257/371472 [10:54:16<17:38:24, 3.69it/s] 37%|███▋ | 137258/371472 [10:54:16<18:23:47, 3.54it/s] 37%|███▋ | 137259/371472 [10:54:17<18:07:22, 3.59it/s] 37%|███▋ | 137260/371472 [10:54:17<17:50:15, 3.65it/s] {'loss': 3.28, 'learning_rate': 6.677530220431382e-07, 'epoch': 5.91} + 37%|███▋ | 137260/371472 [10:54:17<17:50:15, 3.65it/s] 37%|███▋ | 137261/371472 [10:54:17<17:46:59, 3.66it/s] 37%|███▋ | 137262/371472 [10:54:18<18:29:48, 3.52it/s] 37%|███▋ | 137263/371472 [10:54:18<18:00:18, 3.61it/s] 37%|███▋ | 137264/371472 [10:54:18<17:37:29, 3.69it/s] 37%|███▋ | 137265/371472 [10:54:18<18:21:49, 3.54it/s] 37%|███▋ | 137266/371472 [10:54:19<17:53:36, 3.64it/s] 37%|███▋ | 137267/371472 [10:54:19<19:14:39, 3.38it/s] 37%|███▋ | 137268/371472 [10:54:19<18:47:53, 3.46it/s] 37%|███▋ | 137269/371472 [10:54:20<19:14:02, 3.38it/s] 37%|███▋ | 137270/371472 [10:54:20<18:53:04, 3.44it/s] 37%|███▋ | 137271/371472 [10:54:20<19:05:20, 3.41it/s] 37%|███▋ | 137272/371472 [10:54:20<18:59:06, 3.43it/s] 37%|███▋ | 137273/371472 [10:54:21<18:27:04, 3.53it/s] 37%|███▋ | 137274/371472 [10:54:21<17:42:25, 3.67it/s] 37%|███▋ | 137275/371472 [10:54:21<17:37:17, 3.69it/s] 37%|███▋ | 137276/371472 [10:54:21<18:16:20, 3.56it/s] 37%|███▋ | 137277/371472 [10:54:22<17:33:30, 3.70it/s] 37%|███▋ | 137278/371472 [10:54:22<17:06:46, 3.80it/s] 37%|███▋ | 137279/371472 [10:54:22<17:18:02, 3.76it/s] 37%|███▋ | 137280/371472 [10:54:23<17:10:02, 3.79it/s] {'loss': 3.2057, 'learning_rate': 6.677045400676593e-07, 'epoch': 5.91} + 37%|███▋ | 137280/371472 [10:54:23<17:10:02, 3.79it/s] 37%|███▋ | 137281/371472 [10:54:23<16:38:14, 3.91it/s] 37%|███▋ | 137282/371472 [10:54:23<17:01:56, 3.82it/s] 37%|███▋ | 137283/371472 [10:54:23<18:11:31, 3.58it/s] 37%|███▋ | 137284/371472 [10:54:24<17:41:46, 3.68it/s] 37%|███▋ | 137285/371472 [10:54:24<19:00:19, 3.42it/s] 37%|███▋ | 137286/371472 [10:54:24<19:03:21, 3.41it/s] 37%|███▋ | 137287/371472 [10:54:25<20:28:02, 3.18it/s] 37%|███▋ | 137288/371472 [10:54:25<21:34:50, 3.01it/s] 37%|███▋ | 137289/371472 [10:54:25<20:09:03, 3.23it/s] 37%|███▋ | 137290/371472 [10:54:25<19:20:56, 3.36it/s] 37%|███▋ | 137291/371472 [10:54:26<18:59:25, 3.43it/s] 37%|███▋ | 137292/371472 [10:54:26<18:14:08, 3.57it/s] 37%|███▋ | 137293/371472 [10:54:26<19:03:17, 3.41it/s] 37%|███▋ | 137294/371472 [10:54:27<18:19:05, 3.55it/s] 37%|███▋ | 137295/371472 [10:54:27<17:14:24, 3.77it/s] 37%|███▋ | 137296/371472 [10:54:27<19:15:45, 3.38it/s] 37%|███▋ | 137297/371472 [10:54:28<19:49:09, 3.28it/s] 37%|███▋ | 137298/371472 [10:54:28<20:15:59, 3.21it/s] 37%|███▋ | 137299/371472 [10:54:28<19:39:38, 3.31it/s] 37%|███▋ | 137300/371472 [10:54:28<20:12:13, 3.22it/s] {'loss': 3.1788, 'learning_rate': 6.676560580921803e-07, 'epoch': 5.91} + 37%|███▋ | 137300/371472 [10:54:28<20:12:13, 3.22it/s] 37%|███▋ | 137301/371472 [10:54:29<19:36:02, 3.32it/s] 37%|███▋ | 137302/371472 [10:54:29<18:32:20, 3.51it/s] 37%|███▋ | 137303/371472 [10:54:29<18:37:39, 3.49it/s] 37%|███▋ | 137304/371472 [10:54:30<17:54:44, 3.63it/s] 37%|███▋ | 137305/371472 [10:54:30<18:25:45, 3.53it/s] 37%|███▋ | 137306/371472 [10:54:30<18:57:25, 3.43it/s] 37%|███▋ | 137307/371472 [10:54:30<17:49:30, 3.65it/s] 37%|███▋ | 137308/371472 [10:54:31<18:03:15, 3.60it/s] 37%|███▋ | 137309/371472 [10:54:31<18:15:19, 3.56it/s] 37%|███▋ | 137310/371472 [10:54:31<17:45:32, 3.66it/s] 37%|███▋ | 137311/371472 [10:54:31<17:09:57, 3.79it/s] 37%|███▋ | 137312/371472 [10:54:32<17:24:00, 3.74it/s] 37%|███▋ | 137313/371472 [10:54:32<17:10:37, 3.79it/s] 37%|███▋ | 137314/371472 [10:54:32<17:05:28, 3.81it/s] 37%|███▋ | 137315/371472 [10:54:33<17:26:05, 3.73it/s] 37%|███▋ | 137316/371472 [10:54:33<17:32:34, 3.71it/s] 37%|███▋ | 137317/371472 [10:54:33<18:33:47, 3.50it/s] 37%|███▋ | 137318/371472 [10:54:33<18:02:21, 3.61it/s] 37%|███▋ | 137319/371472 [10:54:34<17:35:49, 3.70it/s] 37%|███▋ | 137320/371472 [10:54:34<17:04:09, 3.81it/s] {'loss': 3.2869, 'learning_rate': 6.676075761167014e-07, 'epoch': 5.91} + 37%|███▋ | 137320/371472 [10:54:34<17:04:09, 3.81it/s] 37%|███▋ | 137321/371472 [10:54:34<17:20:07, 3.75it/s] 37%|███▋ | 137322/371472 [10:54:35<19:08:39, 3.40it/s] 37%|███▋ | 137323/371472 [10:54:35<18:56:33, 3.43it/s] 37%|███▋ | 137324/371472 [10:54:35<20:00:25, 3.25it/s] 37%|███▋ | 137325/371472 [10:54:35<19:03:35, 3.41it/s] 37%|███▋ | 137326/371472 [10:54:36<18:00:43, 3.61it/s] 37%|███▋ | 137327/371472 [10:54:36<17:25:23, 3.73it/s] 37%|███▋ | 137328/371472 [10:54:36<19:02:02, 3.42it/s] 37%|███▋ | 137329/371472 [10:54:36<18:40:26, 3.48it/s] 37%|███▋ | 137330/371472 [10:54:37<18:47:02, 3.46it/s] 37%|███▋ | 137331/371472 [10:54:37<20:12:59, 3.22it/s] 37%|███▋ | 137332/371472 [10:54:37<20:00:42, 3.25it/s] 37%|███▋ | 137333/371472 [10:54:38<20:27:21, 3.18it/s] 37%|███▋ | 137334/371472 [10:54:38<20:15:15, 3.21it/s] 37%|███▋ | 137335/371472 [10:54:38<19:48:59, 3.28it/s] 37%|███▋ | 137336/371472 [10:54:39<19:37:39, 3.31it/s] 37%|███▋ | 137337/371472 [10:54:39<18:50:59, 3.45it/s] 37%|███▋ | 137338/371472 [10:54:39<19:32:06, 3.33it/s] 37%|███▋ | 137339/371472 [10:54:40<18:49:29, 3.45it/s] 37%|███▋ | 137340/371472 [10:54:40<21:30:28, 3.02it/s] {'loss': 2.9659, 'learning_rate': 6.675590941412225e-07, 'epoch': 5.92} + 37%|███▋ | 137340/371472 [10:54:40<21:30:28, 3.02it/s] 37%|███▋ | 137341/371472 [10:54:40<19:58:06, 3.26it/s] 37%|███▋ | 137342/371472 [10:54:40<19:43:51, 3.30it/s] 37%|███▋ | 137343/371472 [10:54:41<18:46:12, 3.46it/s] 37%|███▋ | 137344/371472 [10:54:41<19:20:05, 3.36it/s] 37%|███▋ | 137345/371472 [10:54:41<18:34:38, 3.50it/s] 37%|███▋ | 137346/371472 [10:54:42<18:27:46, 3.52it/s] 37%|███▋ | 137347/371472 [10:54:42<18:28:38, 3.52it/s] 37%|███▋ | 137348/371472 [10:54:42<19:06:14, 3.40it/s] 37%|███▋ | 137349/371472 [10:54:42<18:06:03, 3.59it/s] 37%|███▋ | 137350/371472 [10:54:43<17:29:52, 3.72it/s] 37%|███▋ | 137351/371472 [10:54:43<17:28:03, 3.72it/s] 37%|███▋ | 137352/371472 [10:54:43<17:10:15, 3.79it/s] 37%|███▋ | 137353/371472 [10:54:44<17:48:46, 3.65it/s] 37%|███▋ | 137354/371472 [10:54:44<18:23:55, 3.53it/s] 37%|███▋ | 137355/371472 [10:54:44<17:44:59, 3.66it/s] 37%|███▋ | 137356/371472 [10:54:44<16:53:00, 3.85it/s] 37%|███▋ | 137357/371472 [10:54:45<16:34:02, 3.93it/s] 37%|███▋ | 137358/371472 [10:54:45<17:14:40, 3.77it/s] 37%|███▋ | 137359/371472 [10:54:45<16:49:34, 3.86it/s] 37%|███▋ | 137360/371472 [10:54:45<16:24:33, 3.96it/s] {'loss': 3.1462, 'learning_rate': 6.675106121657436e-07, 'epoch': 5.92} + 37%|███▋ | 137360/371472 [10:54:45<16:24:33, 3.96it/s] 37%|███▋ | 137361/371472 [10:54:46<16:42:45, 3.89it/s] 37%|███▋ | 137362/371472 [10:54:46<17:11:23, 3.78it/s] 37%|███▋ | 137363/371472 [10:54:46<17:22:12, 3.74it/s] 37%|███▋ | 137364/371472 [10:54:46<17:16:50, 3.76it/s] 37%|███▋ | 137365/371472 [10:54:47<17:03:53, 3.81it/s] 37%|███▋ | 137366/371472 [10:54:47<16:40:28, 3.90it/s] 37%|███▋ | 137367/371472 [10:54:47<17:05:16, 3.81it/s] 37%|███▋ | 137368/371472 [10:54:47<17:21:06, 3.75it/s] 37%|███▋ | 137369/371472 [10:54:48<17:40:44, 3.68it/s] 37%|███▋ | 137370/371472 [10:54:48<18:34:07, 3.50it/s] 37%|███▋ | 137371/371472 [10:54:48<18:17:25, 3.56it/s] 37%|███▋ | 137372/371472 [10:54:49<17:56:21, 3.62it/s] 37%|███▋ | 137373/371472 [10:54:49<17:44:48, 3.66it/s] 37%|███▋ | 137374/371472 [10:54:49<18:15:36, 3.56it/s] 37%|███▋ | 137375/371472 [10:54:49<17:59:17, 3.61it/s] 37%|███▋ | 137376/371472 [10:54:50<18:41:22, 3.48it/s] 37%|███▋ | 137377/371472 [10:54:50<18:14:14, 3.57it/s] 37%|███▋ | 137378/371472 [10:54:50<19:39:15, 3.31it/s] 37%|███▋ | 137379/371472 [10:54:51<18:57:38, 3.43it/s] 37%|███▋ | 137380/371472 [10:54:51<18:44:10, 3.47it/s] {'loss': 3.0528, 'learning_rate': 6.674621301902648e-07, 'epoch': 5.92} + 37%|███▋ | 137380/371472 [10:54:51<18:44:10, 3.47it/s] 37%|███▋ | 137381/371472 [10:54:51<17:56:31, 3.62it/s] 37%|███▋ | 137382/371472 [10:54:51<17:15:45, 3.77it/s] 37%|███▋ | 137383/371472 [10:54:52<17:58:04, 3.62it/s] 37%|███▋ | 137384/371472 [10:54:52<17:38:37, 3.69it/s] 37%|███▋ | 137385/371472 [10:54:52<18:00:52, 3.61it/s] 37%|███▋ | 137386/371472 [10:54:53<18:11:08, 3.58it/s] 37%|███▋ | 137387/371472 [10:54:53<18:02:54, 3.60it/s] 37%|███▋ | 137388/371472 [10:54:53<17:23:29, 3.74it/s] 37%|███▋ | 137389/371472 [10:54:53<17:38:28, 3.69it/s] 37%|███▋ | 137390/371472 [10:54:54<18:34:14, 3.50it/s] 37%|███▋ | 137391/371472 [10:54:54<18:59:15, 3.42it/s] 37%|███▋ | 137392/371472 [10:54:54<19:04:59, 3.41it/s] 37%|███▋ | 137393/371472 [10:54:54<18:10:47, 3.58it/s] 37%|███▋ | 137394/371472 [10:54:55<18:07:53, 3.59it/s] 37%|███▋ | 137395/371472 [10:54:55<19:57:30, 3.26it/s] 37%|███▋ | 137396/371472 [10:54:55<20:09:08, 3.23it/s] 37%|███▋ | 137397/371472 [10:54:56<19:03:05, 3.41it/s] 37%|███▋ | 137398/371472 [10:54:56<18:09:23, 3.58it/s] 37%|███▋ | 137399/371472 [10:54:56<17:28:39, 3.72it/s] 37%|███▋ | 137400/371472 [10:54:56<17:09:12, 3.79it/s] {'loss': 3.0271, 'learning_rate': 6.674136482147859e-07, 'epoch': 5.92} + 37%|███▋ | 137400/371472 [10:54:56<17:09:12, 3.79it/s] 37%|███▋ | 137401/371472 [10:54:57<18:17:40, 3.55it/s] 37%|███▋ | 137402/371472 [10:54:57<18:20:46, 3.54it/s] 37%|███▋ | 137403/371472 [10:54:57<21:02:25, 3.09it/s] 37%|███▋ | 137404/371472 [10:54:58<19:36:27, 3.32it/s] 37%|███▋ | 137405/371472 [10:54:58<18:36:59, 3.49it/s] 37%|███▋ | 137406/371472 [10:54:58<19:35:26, 3.32it/s] 37%|███▋ | 137407/371472 [10:54:59<18:51:55, 3.45it/s] 37%|███▋ | 137408/371472 [10:54:59<18:22:25, 3.54it/s] 37%|███▋ | 137409/371472 [10:54:59<19:16:39, 3.37it/s] 37%|███▋ | 137410/371472 [10:54:59<19:32:47, 3.33it/s] 37%|███▋ | 137411/371472 [10:55:00<20:03:46, 3.24it/s] 37%|███▋ | 137412/371472 [10:55:00<18:57:49, 3.43it/s] 37%|███▋ | 137413/371472 [10:55:00<18:21:27, 3.54it/s] 37%|███▋ | 137414/371472 [10:55:01<17:40:35, 3.68it/s] 37%|███▋ | 137415/371472 [10:55:01<17:46:51, 3.66it/s] 37%|███▋ | 137416/371472 [10:55:01<18:08:22, 3.58it/s] 37%|███▋ | 137417/371472 [10:55:01<18:09:56, 3.58it/s] 37%|███▋ | 137418/371472 [10:55:02<17:59:26, 3.61it/s] 37%|███▋ | 137419/371472 [10:55:02<18:38:46, 3.49it/s] 37%|███▋ | 137420/371472 [10:55:02<18:06:58, 3.59it/s] {'loss': 2.9471, 'learning_rate': 6.673651662393069e-07, 'epoch': 5.92} + 37%|███▋ | 137420/371472 [10:55:02<18:06:58, 3.59it/s] 37%|███▋ | 137421/371472 [10:55:03<17:35:29, 3.70it/s] 37%|███▋ | 137422/371472 [10:55:03<19:29:55, 3.33it/s] 37%|███▋ | 137423/371472 [10:55:03<21:59:49, 2.96it/s] 37%|███▋ | 137424/371472 [10:55:04<20:04:23, 3.24it/s] 37%|███▋ | 137425/371472 [10:55:04<18:51:25, 3.45it/s] 37%|███▋ | 137426/371472 [10:55:04<19:02:10, 3.42it/s] 37%|███▋ | 137427/371472 [10:55:04<18:31:31, 3.51it/s] 37%|███▋ | 137428/371472 [10:55:05<19:44:02, 3.29it/s] 37%|███▋ | 137429/371472 [10:55:05<19:03:58, 3.41it/s] 37%|███▋ | 137430/371472 [10:55:05<18:18:27, 3.55it/s] 37%|███▋ | 137431/371472 [10:55:05<17:53:02, 3.64it/s] 37%|███▋ | 137432/371472 [10:55:06<18:07:25, 3.59it/s] 37%|███▋ | 137433/371472 [10:55:06<17:47:57, 3.65it/s] 37%|███▋ | 137434/371472 [10:55:06<17:20:05, 3.75it/s] 37%|███▋ | 137435/371472 [10:55:07<18:01:34, 3.61it/s] 37%|███▋ | 137436/371472 [10:55:07<18:22:39, 3.54it/s] 37%|███▋ | 137437/371472 [10:55:07<17:50:20, 3.64it/s] 37%|███▋ | 137438/371472 [10:55:07<19:22:59, 3.35it/s] 37%|███▋ | 137439/371472 [10:55:08<18:31:32, 3.51it/s] 37%|███▋ | 137440/371472 [10:55:08<18:44:09, 3.47it/s] {'loss': 3.2837, 'learning_rate': 6.673166842638281e-07, 'epoch': 5.92} + 37%|███▋ | 137440/371472 [10:55:08<18:44:09, 3.47it/s] 37%|███▋ | 137441/371472 [10:55:08<18:26:15, 3.53it/s] 37%|███▋ | 137442/371472 [10:55:09<20:26:56, 3.18it/s] 37%|███▋ | 137443/371472 [10:55:09<20:33:10, 3.16it/s] 37%|███▋ | 137444/371472 [10:55:09<19:51:31, 3.27it/s] 37%|███▋ | 137445/371472 [10:55:10<19:34:03, 3.32it/s] 37%|███▋ | 137446/371472 [10:55:10<19:03:17, 3.41it/s] 37%|███▋ | 137447/371472 [10:55:10<18:23:47, 3.53it/s] 37%|███▋ | 137448/371472 [10:55:10<17:43:45, 3.67it/s] 37%|███▋ | 137449/371472 [10:55:11<18:01:10, 3.61it/s] 37%|███▋ | 137450/371472 [10:55:11<18:08:04, 3.58it/s] 37%|███▋ | 137451/371472 [10:55:11<18:23:49, 3.53it/s] 37%|███▋ | 137452/371472 [10:55:12<18:44:14, 3.47it/s] 37%|███▋ | 137453/371472 [10:55:12<19:11:39, 3.39it/s] 37%|███▋ | 137454/371472 [10:55:12<18:07:54, 3.59it/s] 37%|███▋ | 137455/371472 [10:55:12<18:48:22, 3.46it/s] 37%|███▋ | 137456/371472 [10:55:13<18:28:17, 3.52it/s] 37%|███▋ | 137457/371472 [10:55:13<17:56:08, 3.62it/s] 37%|███▋ | 137458/371472 [10:55:13<18:31:06, 3.51it/s] 37%|███▋ | 137459/371472 [10:55:13<17:42:14, 3.67it/s] 37%|███▋ | 137460/371472 [10:55:14<17:37:49, 3.69it/s] {'loss': 3.013, 'learning_rate': 6.672682022883492e-07, 'epoch': 5.92} + 37%|███▋ | 137460/371472 [10:55:14<17:37:49, 3.69it/s] 37%|███▋ | 137461/371472 [10:55:14<18:25:38, 3.53it/s] 37%|███▋ | 137462/371472 [10:55:14<17:58:10, 3.62it/s] 37%|███▋ | 137463/371472 [10:55:15<17:21:43, 3.74it/s] 37%|███▋ | 137464/371472 [10:55:15<17:32:41, 3.70it/s] 37%|███▋ | 137465/371472 [10:55:15<17:46:43, 3.66it/s] 37%|███▋ | 137466/371472 [10:55:15<17:00:38, 3.82it/s] 37%|███▋ | 137467/371472 [10:55:16<16:45:14, 3.88it/s] 37%|███▋ | 137468/371472 [10:55:16<17:19:15, 3.75it/s] 37%|███▋ | 137469/371472 [10:55:16<17:38:27, 3.68it/s] 37%|███▋ | 137470/371472 [10:55:16<17:37:21, 3.69it/s] 37%|███▋ | 137471/371472 [10:55:17<17:40:46, 3.68it/s] 37%|███▋ | 137472/371472 [10:55:17<17:40:03, 3.68it/s] 37%|███▋ | 137473/371472 [10:55:17<17:03:34, 3.81it/s] 37%|███▋ | 137474/371472 [10:55:18<17:08:58, 3.79it/s] 37%|███▋ | 137475/371472 [10:55:18<18:13:09, 3.57it/s] 37%|███▋ | 137476/371472 [10:55:18<17:42:30, 3.67it/s] 37%|███▋ | 137477/371472 [10:55:18<18:01:56, 3.60it/s] 37%|███▋ | 137478/371472 [10:55:19<18:11:06, 3.57it/s] 37%|███▋ | 137479/371472 [10:55:19<18:26:12, 3.53it/s] 37%|███▋ | 137480/371472 [10:55:19<18:27:34, 3.52it/s] {'loss': 3.1329, 'learning_rate': 6.672197203128703e-07, 'epoch': 5.92} + 37%|███▋ | 137480/371472 [10:55:19<18:27:34, 3.52it/s] 37%|███▋ | 137481/371472 [10:55:20<18:23:57, 3.53it/s] 37%|███▋ | 137482/371472 [10:55:20<18:31:08, 3.51it/s] 37%|███▋ | 137483/371472 [10:55:20<18:16:21, 3.56it/s] 37%|███▋ | 137484/371472 [10:55:20<19:16:24, 3.37it/s] 37%|███▋ | 137485/371472 [10:55:21<19:05:04, 3.41it/s] 37%|███▋ | 137486/371472 [10:55:21<18:36:12, 3.49it/s] 37%|███▋ | 137487/371472 [10:55:21<17:39:44, 3.68it/s] 37%|███▋ | 137488/371472 [10:55:21<17:42:04, 3.67it/s] 37%|███▋ | 137489/371472 [10:55:22<17:18:39, 3.75it/s] 37%|███▋ | 137490/371472 [10:55:22<17:07:01, 3.80it/s] 37%|███▋ | 137491/371472 [10:55:22<17:06:48, 3.80it/s] 37%|███▋ | 137492/371472 [10:55:23<18:07:03, 3.59it/s] 37%|███▋ | 137493/371472 [10:55:23<17:39:37, 3.68it/s] 37%|███▋ | 137494/371472 [10:55:23<17:35:48, 3.69it/s] 37%|███▋ | 137495/371472 [10:55:23<17:13:25, 3.77it/s] 37%|███▋ | 137496/371472 [10:55:24<19:02:30, 3.41it/s] 37%|███▋ | 137497/371472 [10:55:24<18:32:40, 3.50it/s] 37%|███▋ | 137498/371472 [10:55:24<17:44:00, 3.66it/s] 37%|███▋ | 137499/371472 [10:55:24<17:31:34, 3.71it/s] 37%|███▋ | 137500/371472 [10:55:25<17:22:20, 3.74it/s] {'loss': 3.2816, 'learning_rate': 6.671712383373914e-07, 'epoch': 5.92} + 37%|███▋ | 137500/371472 [10:55:25<17:22:20, 3.74it/s] 37%|███▋ | 137501/371472 [10:55:25<17:33:41, 3.70it/s] 37%|███▋ | 137502/371472 [10:55:25<18:44:29, 3.47it/s] 37%|███▋ | 137503/371472 [10:55:26<18:30:55, 3.51it/s] 37%|███▋ | 137504/371472 [10:55:26<17:49:56, 3.64it/s] 37%|███▋ | 137505/371472 [10:55:26<18:49:27, 3.45it/s] 37%|███▋ | 137506/371472 [10:55:27<19:43:41, 3.29it/s] 37%|███▋ | 137507/371472 [10:55:27<20:12:20, 3.22it/s] 37%|███▋ | 137508/371472 [10:55:27<20:55:21, 3.11it/s] 37%|███▋ | 137509/371472 [10:55:27<20:20:23, 3.20it/s] 37%|███▋ | 137510/371472 [10:55:28<19:32:44, 3.32it/s] 37%|███▋ | 137511/371472 [10:55:28<19:53:01, 3.27it/s] 37%|███▋ | 137512/371472 [10:55:28<21:07:54, 3.08it/s] 37%|███▋ | 137513/371472 [10:55:29<20:25:20, 3.18it/s] 37%|███▋ | 137514/371472 [10:55:29<19:22:27, 3.35it/s] 37%|███▋ | 137515/371472 [10:55:29<19:36:16, 3.31it/s] 37%|███▋ | 137516/371472 [10:55:30<18:46:32, 3.46it/s] 37%|███▋ | 137517/371472 [10:55:30<18:50:30, 3.45it/s] 37%|███▋ | 137518/371472 [10:55:30<19:51:09, 3.27it/s] 37%|███▋ | 137519/371472 [10:55:30<19:35:37, 3.32it/s] 37%|███▋ | 137520/371472 [10:55:31<19:56:01, 3.26it/s] {'loss': 3.0798, 'learning_rate': 6.671227563619126e-07, 'epoch': 5.92} + 37%|███▋ | 137520/371472 [10:55:31<19:56:01, 3.26it/s] 37%|███▋ | 137521/371472 [10:55:31<19:24:38, 3.35it/s] 37%|███▋ | 137522/371472 [10:55:31<18:15:16, 3.56it/s] 37%|███▋ | 137523/371472 [10:55:32<17:28:18, 3.72it/s] 37%|███▋ | 137524/371472 [10:55:32<18:05:46, 3.59it/s] 37%|███▋ | 137525/371472 [10:55:32<18:04:08, 3.60it/s] 37%|███▋ | 137526/371472 [10:55:32<17:40:06, 3.68it/s] 37%|███▋ | 137527/371472 [10:55:33<17:10:42, 3.78it/s] 37%|███▋ | 137528/371472 [10:55:33<17:34:35, 3.70it/s] 37%|███▋ | 137529/371472 [10:55:33<17:13:46, 3.77it/s] 37%|███▋ | 137530/371472 [10:55:34<18:09:22, 3.58it/s] 37%|███▋ | 137531/371472 [10:55:34<18:01:14, 3.61it/s] 37%|███▋ | 137532/371472 [10:55:34<19:02:32, 3.41it/s] 37%|███▋ | 137533/371472 [10:55:34<19:06:28, 3.40it/s] 37%|███▋ | 137534/371472 [10:55:35<18:22:55, 3.54it/s] 37%|███▋ | 137535/371472 [10:55:35<18:16:07, 3.56it/s] 37%|███▋ | 137536/371472 [10:55:35<19:01:10, 3.42it/s] 37%|███▋ | 137537/371472 [10:55:36<18:20:10, 3.54it/s] 37%|███▋ | 137538/371472 [10:55:36<18:17:35, 3.55it/s] 37%|███▋ | 137539/371472 [10:55:36<17:34:23, 3.70it/s] 37%|███▋ | 137540/371472 [10:55:36<17:33:58, 3.70it/s] {'loss': 3.1757, 'learning_rate': 6.670742743864337e-07, 'epoch': 5.92} + 37%|███▋ | 137540/371472 [10:55:36<17:33:58, 3.70it/s] 37%|███▋ | 137541/371472 [10:55:37<17:24:51, 3.73it/s] 37%|███▋ | 137542/371472 [10:55:37<17:23:23, 3.74it/s] 37%|███▋ | 137543/371472 [10:55:37<17:09:03, 3.79it/s] 37%|███▋ | 137544/371472 [10:55:37<18:12:59, 3.57it/s] 37%|███▋ | 137545/371472 [10:55:38<17:21:03, 3.74it/s] 37%|███▋ | 137546/371472 [10:55:38<17:28:17, 3.72it/s] 37%|███▋ | 137547/371472 [10:55:38<17:24:23, 3.73it/s] 37%|███▋ | 137548/371472 [10:55:38<18:01:01, 3.61it/s] 37%|███▋ | 137549/371472 [10:55:39<19:59:58, 3.25it/s] 37%|███▋ | 137550/371472 [10:55:39<21:06:34, 3.08it/s] 37%|███▋ | 137551/371472 [10:55:39<19:41:21, 3.30it/s] 37%|███▋ | 137552/371472 [10:55:40<19:40:57, 3.30it/s] 37%|███▋ | 137553/371472 [10:55:40<19:42:45, 3.30it/s] 37%|███▋ | 137554/371472 [10:55:40<18:58:35, 3.42it/s] 37%|███▋ | 137555/371472 [10:55:41<18:19:18, 3.55it/s] 37%|███▋ | 137556/371472 [10:55:41<17:40:24, 3.68it/s] 37%|███▋ | 137557/371472 [10:55:41<17:47:20, 3.65it/s] 37%|███▋ | 137558/371472 [10:55:41<17:52:38, 3.63it/s] 37%|███▋ | 137559/371472 [10:55:42<18:26:27, 3.52it/s] 37%|███▋ | 137560/371472 [10:55:42<18:52:25, 3.44it/s] {'loss': 3.1536, 'learning_rate': 6.670257924109547e-07, 'epoch': 5.92} + 37%|███▋ | 137560/371472 [10:55:42<18:52:25, 3.44it/s] 37%|███▋ | 137561/371472 [10:55:42<19:38:24, 3.31it/s] 37%|███▋ | 137562/371472 [10:55:43<18:39:40, 3.48it/s] 37%|███▋ | 137563/371472 [10:55:43<19:07:11, 3.40it/s] 37%|███▋ | 137564/371472 [10:55:43<18:15:58, 3.56it/s] 37%|███▋ | 137565/371472 [10:55:43<18:43:12, 3.47it/s] 37%|███▋ | 137566/371472 [10:55:44<18:39:15, 3.48it/s] 37%|███▋ | 137567/371472 [10:55:44<21:19:07, 3.05it/s] 37%|███▋ | 137568/371472 [10:55:44<20:26:04, 3.18it/s] 37%|███▋ | 137569/371472 [10:55:45<24:46:18, 2.62it/s] 37%|███▋ | 137570/371472 [10:55:45<22:12:52, 2.92it/s] 37%|███▋ | 137571/371472 [10:55:46<23:10:55, 2.80it/s] 37%|███▋ | 137572/371472 [10:55:46<22:29:48, 2.89it/s] 37%|███▋ | 137573/371472 [10:55:46<21:10:17, 3.07it/s] 37%|███▋ | 137574/371472 [10:55:47<20:07:02, 3.23it/s] 37%|███▋ | 137575/371472 [10:55:47<19:18:30, 3.36it/s] 37%|███▋ | 137576/371472 [10:55:47<20:13:44, 3.21it/s] 37%|███▋ | 137577/371472 [10:55:47<19:09:22, 3.39it/s] 37%|███▋ | 137578/371472 [10:55:48<18:10:14, 3.58it/s] 37%|███▋ | 137579/371472 [10:55:48<18:10:20, 3.58it/s] 37%|███▋ | 137580/371472 [10:55:48<17:52:34, 3.63it/s] {'loss': 3.1504, 'learning_rate': 6.669773104354758e-07, 'epoch': 5.93} + 37%|███▋ | 137580/371472 [10:55:48<17:52:34, 3.63it/s] 37%|███▋ | 137581/371472 [10:55:48<18:08:27, 3.58it/s] 37%|███▋ | 137582/371472 [10:55:49<19:27:02, 3.34it/s] 37%|███▋ | 137583/371472 [10:55:49<20:06:11, 3.23it/s] 37%|███▋ | 137584/371472 [10:55:49<20:23:23, 3.19it/s] 37%|███▋ | 137585/371472 [10:55:50<19:29:27, 3.33it/s] 37%|███▋ | 137586/371472 [10:55:50<18:54:46, 3.44it/s] 37%|███▋ | 137587/371472 [10:55:50<18:56:20, 3.43it/s] 37%|███▋ | 137588/371472 [10:55:51<19:08:48, 3.39it/s] 37%|███▋ | 137589/371472 [10:55:51<18:46:08, 3.46it/s] 37%|███▋ | 137590/371472 [10:55:51<19:18:21, 3.37it/s] 37%|███▋ | 137591/371472 [10:55:51<18:17:32, 3.55it/s] 37%|███▋ | 137592/371472 [10:55:52<18:03:13, 3.60it/s] 37%|███▋ | 137593/371472 [10:55:52<17:45:39, 3.66it/s] 37%|███▋ | 137594/371472 [10:55:52<17:43:21, 3.67it/s] 37%|███▋ | 137595/371472 [10:55:52<17:07:32, 3.79it/s] 37%|███▋ | 137596/371472 [10:55:53<18:05:23, 3.59it/s] 37%|███▋ | 137597/371472 [10:55:53<17:39:40, 3.68it/s] 37%|███▋ | 137598/371472 [10:55:53<18:00:29, 3.61it/s] 37%|███▋ | 137599/371472 [10:55:54<17:48:38, 3.65it/s] 37%|███▋ | 137600/371472 [10:55:54<18:57:10, 3.43it/s] {'loss': 3.0666, 'learning_rate': 6.66928828459997e-07, 'epoch': 5.93} + 37%|███▋ | 137600/371472 [10:55:54<18:57:10, 3.43it/s] 37%|███▋ | 137601/371472 [10:55:54<18:43:21, 3.47it/s] 37%|███▋ | 137602/371472 [10:55:55<18:52:45, 3.44it/s] 37%|███▋ | 137603/371472 [10:55:55<18:21:57, 3.54it/s] 37%|███▋ | 137604/371472 [10:55:55<18:31:26, 3.51it/s] 37%|███▋ | 137605/371472 [10:55:55<17:53:16, 3.63it/s] 37%|███▋ | 137606/371472 [10:55:56<17:57:43, 3.62it/s] 37%|███▋ | 137607/371472 [10:55:56<19:02:14, 3.41it/s] 37%|███▋ | 137608/371472 [10:55:56<18:07:24, 3.58it/s] 37%|███▋ | 137609/371472 [10:55:56<18:23:34, 3.53it/s] 37%|███▋ | 137610/371472 [10:55:57<18:37:48, 3.49it/s] 37%|███▋ | 137611/371472 [10:55:57<24:45:02, 2.62it/s] 37%|███▋ | 137612/371472 [10:55:58<23:11:51, 2.80it/s] 37%|███▋ | 137613/371472 [10:55:58<21:03:20, 3.09it/s] 37%|███▋ | 137614/371472 [10:55:58<22:02:06, 2.95it/s] 37%|███▋ | 137615/371472 [10:55:59<20:57:33, 3.10it/s] 37%|███▋ | 137616/371472 [10:55:59<21:50:13, 2.97it/s] 37%|███▋ | 137617/371472 [10:55:59<20:46:52, 3.13it/s] 37%|███▋ | 137618/371472 [10:55:59<19:24:38, 3.35it/s] 37%|███▋ | 137619/371472 [10:56:00<19:07:06, 3.40it/s] 37%|███▋ | 137620/371472 [10:56:00<18:58:19, 3.42it/s] {'loss': 3.0226, 'learning_rate': 6.668803464845181e-07, 'epoch': 5.93} + 37%|███▋ | 137620/371472 [10:56:00<18:58:19, 3.42it/s] 37%|███▋ | 137621/371472 [10:56:00<18:59:14, 3.42it/s] 37%|███▋ | 137622/371472 [10:56:01<19:43:10, 3.29it/s] 37%|███▋ | 137623/371472 [10:56:01<21:04:09, 3.08it/s] 37%|███▋ | 137624/371472 [10:56:01<20:43:28, 3.13it/s] 37%|███▋ | 137625/371472 [10:56:02<20:05:46, 3.23it/s] 37%|███▋ | 137626/371472 [10:56:02<20:01:57, 3.24it/s] 37%|███▋ | 137627/371472 [10:56:02<19:36:20, 3.31it/s] 37%|███▋ | 137628/371472 [10:56:03<19:12:48, 3.38it/s] 37%|███▋ | 137629/371472 [10:56:03<18:33:06, 3.50it/s] 37%|███▋ | 137630/371472 [10:56:03<18:10:38, 3.57it/s] 37%|███▋ | 137631/371472 [10:56:03<17:35:35, 3.69it/s] 37%|███▋ | 137632/371472 [10:56:04<17:50:19, 3.64it/s] 37%|███▋ | 137633/371472 [10:56:04<18:36:40, 3.49it/s] 37%|███▋ | 137634/371472 [10:56:04<17:59:49, 3.61it/s] 37%|███▋ | 137635/371472 [10:56:04<18:16:14, 3.56it/s] 37%|███▋ | 137636/371472 [10:56:05<17:36:03, 3.69it/s] 37%|███▋ | 137637/371472 [10:56:05<17:21:12, 3.74it/s] 37%|███▋ | 137638/371472 [10:56:05<16:50:49, 3.86it/s] 37%|███▋ | 137639/371472 [10:56:06<18:34:34, 3.50it/s] 37%|███▋ | 137640/371472 [10:56:06<18:00:52, 3.61it/s] {'loss': 3.0879, 'learning_rate': 6.668318645090391e-07, 'epoch': 5.93} + 37%|███▋ | 137640/371472 [10:56:06<18:00:52, 3.61it/s] 37%|███▋ | 137641/371472 [10:56:06<19:05:57, 3.40it/s] 37%|███▋ | 137642/371472 [10:56:06<19:00:33, 3.42it/s] 37%|███▋ | 137643/371472 [10:56:07<19:05:54, 3.40it/s] 37%|███▋ | 137644/371472 [10:56:07<18:06:25, 3.59it/s] 37%|███▋ | 137645/371472 [10:56:07<17:29:11, 3.71it/s] 37%|███▋ | 137646/371472 [10:56:07<17:41:49, 3.67it/s] 37%|███▋ | 137647/371472 [10:56:08<17:12:23, 3.77it/s] 37%|███▋ | 137648/371472 [10:56:08<17:48:14, 3.65it/s] 37%|███▋ | 137649/371472 [10:56:08<18:21:52, 3.54it/s] 37%|███▋ | 137650/371472 [10:56:09<19:05:10, 3.40it/s] 37%|███▋ | 137651/371472 [10:56:09<18:34:00, 3.50it/s] 37%|███▋ | 137652/371472 [10:56:09<17:52:47, 3.63it/s] 37%|███▋ | 137653/371472 [10:56:09<18:45:38, 3.46it/s] 37%|███▋ | 137654/371472 [10:56:10<18:38:30, 3.48it/s] 37%|███▋ | 137655/371472 [10:56:10<18:31:10, 3.51it/s] 37%|███▋ | 137656/371472 [10:56:10<18:01:20, 3.60it/s] 37%|███▋ | 137657/371472 [10:56:11<18:12:52, 3.57it/s] 37%|███▋ | 137658/371472 [10:56:11<17:53:16, 3.63it/s] 37%|███▋ | 137659/371472 [10:56:11<17:09:23, 3.79it/s] 37%|███▋ | 137660/371472 [10:56:11<18:05:06, 3.59it/s] {'loss': 3.1144, 'learning_rate': 6.667833825335603e-07, 'epoch': 5.93} + 37%|███▋ | 137660/371472 [10:56:11<18:05:06, 3.59it/s] 37%|███▋ | 137661/371472 [10:56:12<17:44:19, 3.66it/s] 37%|███▋ | 137662/371472 [10:56:12<18:19:50, 3.54it/s] 37%|███▋ | 137663/371472 [10:56:12<19:29:17, 3.33it/s] 37%|███▋ | 137664/371472 [10:56:13<18:37:49, 3.49it/s] 37%|███▋ | 137665/371472 [10:56:13<18:48:04, 3.45it/s] 37%|███▋ | 137666/371472 [10:56:13<18:55:23, 3.43it/s] 37%|███▋ | 137667/371472 [10:56:13<18:53:32, 3.44it/s] 37%|███▋ | 137668/371472 [10:56:14<18:40:34, 3.48it/s] 37%|███▋ | 137669/371472 [10:56:14<18:31:02, 3.51it/s] 37%|███▋ | 137670/371472 [10:56:14<18:19:18, 3.54it/s] 37%|███▋ | 137671/371472 [10:56:15<17:40:02, 3.68it/s] 37%|███▋ | 137672/371472 [10:56:15<17:08:03, 3.79it/s] 37%|███▋ | 137673/371472 [10:56:15<17:38:03, 3.68it/s] 37%|███▋ | 137674/371472 [10:56:15<16:49:28, 3.86it/s] 37%|███▋ | 137675/371472 [10:56:16<16:57:50, 3.83it/s] 37%|███▋ | 137676/371472 [10:56:16<17:11:50, 3.78it/s] 37%|███▋ | 137677/371472 [10:56:16<17:59:11, 3.61it/s] 37%|███▋ | 137678/371472 [10:56:16<17:31:05, 3.71it/s] 37%|███▋ | 137679/371472 [10:56:17<17:59:51, 3.61it/s] 37%|███▋ | 137680/371472 [10:56:17<18:02:52, 3.60it/s] {'loss': 3.176, 'learning_rate': 6.667349005580814e-07, 'epoch': 5.93} + 37%|███▋ | 137680/371472 [10:56:17<18:02:52, 3.60it/s] 37%|███▋ | 137681/371472 [10:56:17<17:23:20, 3.73it/s] 37%|███▋ | 137682/371472 [10:56:17<17:10:07, 3.78it/s] 37%|███▋ | 137683/371472 [10:56:18<26:07:34, 2.49it/s] 37%|███▋ | 137684/371472 [10:56:19<24:46:42, 2.62it/s] 37%|███▋ | 137685/371472 [10:56:19<22:46:10, 2.85it/s] 37%|███▋ | 137686/371472 [10:56:19<20:51:27, 3.11it/s] 37%|███▋ | 137687/371472 [10:56:19<20:20:21, 3.19it/s] 37%|███▋ | 137688/371472 [10:56:20<19:39:16, 3.30it/s] 37%|███▋ | 137689/371472 [10:56:20<18:16:41, 3.55it/s] 37%|███▋ | 137690/371472 [10:56:20<19:09:27, 3.39it/s] 37%|███▋ | 137691/371472 [10:56:20<18:25:10, 3.53it/s] 37%|███▋ | 137692/371472 [10:56:21<18:01:45, 3.60it/s] 37%|███▋ | 137693/371472 [10:56:21<17:54:56, 3.62it/s] 37%|███▋ | 137694/371472 [10:56:21<18:28:55, 3.51it/s] 37%|███▋ | 137695/371472 [10:56:22<18:46:56, 3.46it/s] 37%|███▋ | 137696/371472 [10:56:22<19:19:36, 3.36it/s] 37%|███▋ | 137697/371472 [10:56:22<18:58:29, 3.42it/s] 37%|███▋ | 137698/371472 [10:56:22<18:01:39, 3.60it/s] 37%|███▋ | 137699/371472 [10:56:23<17:33:47, 3.70it/s] 37%|███▋ | 137700/371472 [10:56:23<19:22:19, 3.35it/s] {'loss': 3.203, 'learning_rate': 6.666864185826025e-07, 'epoch': 5.93} + 37%|███▋ | 137700/371472 [10:56:23<19:22:19, 3.35it/s] 37%|███▋ | 137701/371472 [10:56:23<20:41:13, 3.14it/s] 37%|███▋ | 137702/371472 [10:56:24<19:42:43, 3.29it/s] 37%|███▋ | 137703/371472 [10:56:24<18:44:30, 3.46it/s] 37%|███▋ | 137704/371472 [10:56:24<17:37:58, 3.68it/s] 37%|███▋ | 137705/371472 [10:56:24<17:32:06, 3.70it/s] 37%|███▋ | 137706/371472 [10:56:25<19:03:50, 3.41it/s] 37%|███▋ | 137707/371472 [10:56:25<18:43:27, 3.47it/s] 37%|███▋ | 137708/371472 [10:56:25<18:22:11, 3.53it/s] 37%|███▋ | 137709/371472 [10:56:26<19:37:53, 3.31it/s] 37%|███▋ | 137710/371472 [10:56:26<18:22:25, 3.53it/s] 37%|███▋ | 137711/371472 [10:56:26<18:14:56, 3.56it/s] 37%|███▋ | 137712/371472 [10:56:26<17:45:38, 3.66it/s] 37%|███▋ | 137713/371472 [10:56:27<18:19:35, 3.54it/s] 37%|███▋ | 137714/371472 [10:56:27<17:43:00, 3.67it/s] 37%|███▋ | 137715/371472 [10:56:27<17:26:37, 3.72it/s] 37%|███▋ | 137716/371472 [10:56:27<16:56:36, 3.83it/s] 37%|███▋ | 137717/371472 [10:56:28<16:56:29, 3.83it/s] 37%|███▋ | 137718/371472 [10:56:28<16:51:54, 3.85it/s] 37%|███▋ | 137719/371472 [10:56:28<16:59:48, 3.82it/s] 37%|███▋ | 137720/371472 [10:56:29<17:07:33, 3.79it/s] {'loss': 3.1169, 'learning_rate': 6.666379366071235e-07, 'epoch': 5.93} + 37%|███▋ | 137720/371472 [10:56:29<17:07:33, 3.79it/s] 37%|███▋ | 137721/371472 [10:56:29<18:31:49, 3.50it/s] 37%|███▋ | 137722/371472 [10:56:29<17:43:13, 3.66it/s] 37%|███▋ | 137723/371472 [10:56:29<18:18:46, 3.55it/s] 37%|███▋ | 137724/371472 [10:56:30<17:53:02, 3.63it/s] 37%|███▋ | 137725/371472 [10:56:30<18:37:55, 3.48it/s] 37%|███▋ | 137726/371472 [10:56:30<17:37:01, 3.69it/s] 37%|███▋ | 137727/371472 [10:56:31<17:27:05, 3.72it/s] 37%|███▋ | 137728/371472 [10:56:31<17:17:33, 3.75it/s] 37%|███▋ | 137729/371472 [10:56:31<17:29:12, 3.71it/s] 37%|███▋ | 137730/371472 [10:56:31<17:16:05, 3.76it/s] 37%|███▋ | 137731/371472 [10:56:32<17:54:46, 3.62it/s] 37%|███▋ | 137732/371472 [10:56:32<17:29:56, 3.71it/s] 37%|███▋ | 137733/371472 [10:56:32<16:57:39, 3.83it/s] 37%|███▋ | 137734/371472 [10:56:32<17:04:57, 3.80it/s] 37%|███▋ | 137735/371472 [10:56:33<17:19:38, 3.75it/s] 37%|███▋ | 137736/371472 [10:56:33<18:42:46, 3.47it/s] 37%|███▋ | 137737/371472 [10:56:33<18:38:22, 3.48it/s] 37%|███▋ | 137738/371472 [10:56:34<18:14:04, 3.56it/s] 37%|███▋ | 137739/371472 [10:56:34<18:01:18, 3.60it/s] 37%|███▋ | 137740/371472 [10:56:34<17:42:56, 3.66it/s] {'loss': 3.2139, 'learning_rate': 6.665894546316447e-07, 'epoch': 5.93} + 37%|███▋ | 137740/371472 [10:56:34<17:42:56, 3.66it/s] 37%|███▋ | 137741/371472 [10:56:34<17:29:22, 3.71it/s] 37%|███▋ | 137742/371472 [10:56:35<17:01:12, 3.81it/s] 37%|███▋ | 137743/371472 [10:56:35<18:18:28, 3.55it/s] 37%|███▋ | 137744/371472 [10:56:35<18:10:10, 3.57it/s] 37%|███▋ | 137745/371472 [10:56:35<17:53:19, 3.63it/s] 37%|███▋ | 137746/371472 [10:56:36<17:53:20, 3.63it/s] 37%|███▋ | 137747/371472 [10:56:36<17:33:29, 3.70it/s] 37%|███▋ | 137748/371472 [10:56:36<18:54:51, 3.43it/s] 37%|███▋ | 137749/371472 [10:56:37<18:18:59, 3.54it/s] 37%|███▋ | 137750/371472 [10:56:37<18:28:05, 3.52it/s] 37%|███▋ | 137751/371472 [10:56:37<19:18:15, 3.36it/s] 37%|███▋ | 137752/371472 [10:56:37<18:53:18, 3.44it/s] 37%|███▋ | 137753/371472 [10:56:38<19:17:20, 3.37it/s] 37%|███▋ | 137754/371472 [10:56:38<21:43:01, 2.99it/s] 37%|███▋ | 137755/371472 [10:56:38<20:59:16, 3.09it/s] 37%|███▋ | 137756/371472 [10:56:39<20:02:31, 3.24it/s] 37%|███▋ | 137757/371472 [10:56:39<18:40:05, 3.48it/s] 37%|███▋ | 137758/371472 [10:56:39<18:50:26, 3.45it/s] 37%|███▋ | 137759/371472 [10:56:40<18:30:55, 3.51it/s] 37%|███▋ | 137760/371472 [10:56:40<18:10:15, 3.57it/s] {'loss': 3.2804, 'learning_rate': 6.665409726561659e-07, 'epoch': 5.93} + 37%|███▋ | 137760/371472 [10:56:40<18:10:15, 3.57it/s] 37%|███▋ | 137761/371472 [10:56:40<18:16:53, 3.55it/s] 37%|███▋ | 137762/371472 [10:56:40<18:08:43, 3.58it/s] 37%|███▋ | 137763/371472 [10:56:41<18:19:32, 3.54it/s] 37%|███▋ | 137764/371472 [10:56:41<17:54:34, 3.62it/s] 37%|███▋ | 137765/371472 [10:56:41<17:09:23, 3.78it/s] 37%|███▋ | 137766/371472 [10:56:41<17:30:10, 3.71it/s] 37%|███▋ | 137767/371472 [10:56:42<17:42:26, 3.67it/s] 37%|███▋ | 137768/371472 [10:56:42<18:11:30, 3.57it/s] 37%|███▋ | 137769/371472 [10:56:42<17:28:48, 3.71it/s] 37%|███▋ | 137770/371472 [10:56:43<17:54:29, 3.63it/s] 37%|███▋ | 137771/371472 [10:56:43<17:39:09, 3.68it/s] 37%|███▋ | 137772/371472 [10:56:43<18:18:31, 3.55it/s] 37%|███▋ | 137773/371472 [10:56:43<17:52:42, 3.63it/s] 37%|███▋ | 137774/371472 [10:56:44<18:01:43, 3.60it/s] 37%|███▋ | 137775/371472 [10:56:44<17:48:39, 3.64it/s] 37%|███▋ | 137776/371472 [10:56:44<17:17:14, 3.76it/s] 37%|███▋ | 137777/371472 [10:56:45<17:59:19, 3.61it/s] 37%|███▋ | 137778/371472 [10:56:45<17:54:17, 3.63it/s] 37%|███▋ | 137779/371472 [10:56:45<17:26:43, 3.72it/s] 37%|███▋ | 137780/371472 [10:56:45<17:59:09, 3.61it/s] {'loss': 3.1852, 'learning_rate': 6.66492490680687e-07, 'epoch': 5.93} + 37%|███▋ | 137780/371472 [10:56:45<17:59:09, 3.61it/s] 37%|███▋ | 137781/371472 [10:56:46<18:21:46, 3.54it/s] 37%|███▋ | 137782/371472 [10:56:46<17:40:59, 3.67it/s] 37%|███▋ | 137783/371472 [10:56:46<17:53:55, 3.63it/s] 37%|███▋ | 137784/371472 [10:56:46<17:12:31, 3.77it/s] 37%|███▋ | 137785/371472 [10:56:47<18:18:57, 3.54it/s] 37%|███▋ | 137786/371472 [10:56:47<17:59:31, 3.61it/s] 37%|███▋ | 137787/371472 [10:56:47<17:44:53, 3.66it/s] 37%|███▋ | 137788/371472 [10:56:48<17:46:39, 3.65it/s] 37%|███▋ | 137789/371472 [10:56:48<17:49:31, 3.64it/s] 37%|███▋ | 137790/371472 [10:56:48<19:09:29, 3.39it/s] 37%|███▋ | 137791/371472 [10:56:48<19:01:44, 3.41it/s] 37%|███▋ | 137792/371472 [10:56:49<19:19:35, 3.36it/s] 37%|███▋ | 137793/371472 [10:56:49<18:57:52, 3.42it/s] 37%|███▋ | 137794/371472 [10:56:49<17:51:30, 3.63it/s] 37%|███▋ | 137795/371472 [10:56:50<19:02:36, 3.41it/s] 37%|███▋ | 137796/371472 [10:56:50<19:27:15, 3.34it/s] 37%|███▋ | 137797/371472 [10:56:50<19:36:14, 3.31it/s] 37%|███▋ | 137798/371472 [10:56:51<19:27:49, 3.33it/s] 37%|███▋ | 137799/371472 [10:56:51<19:22:15, 3.35it/s] 37%|███▋ | 137800/371472 [10:56:51<18:47:52, 3.45it/s] {'loss': 3.035, 'learning_rate': 6.664440087052079e-07, 'epoch': 5.94} + 37%|███▋ | 137800/371472 [10:56:51<18:47:52, 3.45it/s] 37%|███▋ | 137801/371472 [10:56:51<17:52:45, 3.63it/s] 37%|███▋ | 137802/371472 [10:56:52<17:49:57, 3.64it/s] 37%|███▋ | 137803/371472 [10:56:52<17:17:30, 3.75it/s] 37%|███▋ | 137804/371472 [10:56:52<17:21:28, 3.74it/s] 37%|███▋ | 137805/371472 [10:56:52<17:49:12, 3.64it/s] 37%|███▋ | 137806/371472 [10:56:53<17:45:18, 3.66it/s] 37%|███▋ | 137807/371472 [10:56:53<17:09:25, 3.78it/s] 37%|███▋ | 137808/371472 [10:56:53<17:38:33, 3.68it/s] 37%|███▋ | 137809/371472 [10:56:53<17:54:21, 3.62it/s] 37%|███▋ | 137810/371472 [10:56:54<17:46:22, 3.65it/s] 37%|███▋ | 137811/371472 [10:56:54<18:29:45, 3.51it/s] 37%|███▋ | 137812/371472 [10:56:54<19:27:29, 3.34it/s] 37%|███▋ | 137813/371472 [10:56:55<18:40:22, 3.48it/s] 37%|███▋ | 137814/371472 [10:56:55<19:29:12, 3.33it/s] 37%|███▋ | 137815/371472 [10:56:55<18:55:29, 3.43it/s] 37%|███▋ | 137816/371472 [10:56:56<18:40:28, 3.48it/s] 37%|███▋ | 137817/371472 [10:56:56<17:50:55, 3.64it/s] 37%|███▋ | 137818/371472 [10:56:56<17:55:37, 3.62it/s] 37%|███▋ | 137819/371472 [10:56:56<17:41:27, 3.67it/s] 37%|███▋ | 137820/371472 [10:56:57<17:13:10, 3.77it/s] {'loss': 3.0937, 'learning_rate': 6.663955267297291e-07, 'epoch': 5.94} + 37%|███▋ | 137820/371472 [10:56:57<17:13:10, 3.77it/s] 37%|███▋ | 137821/371472 [10:56:57<16:52:18, 3.85it/s] 37%|███▋ | 137822/371472 [10:56:57<18:25:50, 3.52it/s] 37%|███▋ | 137823/371472 [10:56:57<18:45:01, 3.46it/s] 37%|███▋ | 137824/371472 [10:56:58<18:20:39, 3.54it/s] 37%|███▋ | 137825/371472 [10:56:58<18:36:51, 3.49it/s] 37%|███▋ | 137826/371472 [10:56:58<20:00:29, 3.24it/s] 37%|███▋ | 137827/371472 [10:56:59<19:47:29, 3.28it/s] 37%|███▋ | 137828/371472 [10:56:59<18:32:25, 3.50it/s] 37%|███▋ | 137829/371472 [10:56:59<18:45:15, 3.46it/s] 37%|███▋ | 137830/371472 [10:56:59<18:24:27, 3.53it/s] 37%|��██▋ | 137831/371472 [10:57:00<17:59:03, 3.61it/s] 37%|███▋ | 137832/371472 [10:57:00<17:49:36, 3.64it/s] 37%|███▋ | 137833/371472 [10:57:00<17:18:07, 3.75it/s] 37%|███▋ | 137834/371472 [10:57:01<16:43:37, 3.88it/s] 37%|███▋ | 137835/371472 [10:57:01<16:21:37, 3.97it/s] 37%|███▋ | 137836/371472 [10:57:01<22:30:53, 2.88it/s] 37%|███▋ | 137837/371472 [10:57:02<21:00:50, 3.09it/s] 37%|███▋ | 137838/371472 [10:57:02<20:10:26, 3.22it/s] 37%|███▋ | 137839/371472 [10:57:02<19:17:35, 3.36it/s] 37%|███▋ | 137840/371472 [10:57:02<18:40:59, 3.47it/s] {'loss': 3.0263, 'learning_rate': 6.663470447542502e-07, 'epoch': 5.94} + 37%|███▋ | 137840/371472 [10:57:02<18:40:59, 3.47it/s] 37%|███▋ | 137841/371472 [10:57:03<17:44:57, 3.66it/s] 37%|███▋ | 137842/371472 [10:57:03<17:11:20, 3.78it/s] 37%|███▋ | 137843/371472 [10:57:03<16:46:30, 3.87it/s] 37%|███▋ | 137844/371472 [10:57:03<16:59:13, 3.82it/s] 37%|███▋ | 137845/371472 [10:57:04<17:24:15, 3.73it/s] 37%|███▋ | 137846/371472 [10:57:04<17:37:31, 3.68it/s] 37%|███▋ | 137847/371472 [10:57:04<17:00:06, 3.82it/s] 37%|███▋ | 137848/371472 [10:57:04<17:34:48, 3.69it/s] 37%|███▋ | 137849/371472 [10:57:05<16:57:33, 3.83it/s] 37%|███▋ | 137850/371472 [10:57:05<17:28:41, 3.71it/s] 37%|███▋ | 137851/371472 [10:57:05<17:35:56, 3.69it/s] 37%|███▋ | 137852/371472 [10:57:06<17:19:43, 3.74it/s] 37%|███▋ | 137853/371472 [10:57:06<16:47:44, 3.86it/s] 37%|███▋ | 137854/371472 [10:57:06<16:33:59, 3.92it/s] 37%|███▋ | 137855/371472 [10:57:06<16:28:40, 3.94it/s] 37%|███▋ | 137856/371472 [10:57:07<16:54:17, 3.84it/s] 37%|███▋ | 137857/371472 [10:57:07<17:16:12, 3.76it/s] 37%|███▋ | 137858/371472 [10:57:07<17:19:11, 3.75it/s] 37%|███▋ | 137859/371472 [10:57:07<17:33:26, 3.70it/s] 37%|███▋ | 137860/371472 [10:57:08<17:00:38, 3.81it/s] {'loss': 3.3591, 'learning_rate': 6.662985627787713e-07, 'epoch': 5.94} + 37%|███▋ | 137860/371472 [10:57:08<17:00:38, 3.81it/s] 37%|███▋ | 137861/371472 [10:57:08<17:04:31, 3.80it/s] 37%|███▋ | 137862/371472 [10:57:08<17:44:26, 3.66it/s] 37%|███▋ | 137863/371472 [10:57:08<17:42:28, 3.66it/s] 37%|███▋ | 137864/371472 [10:57:09<18:31:42, 3.50it/s] 37%|███▋ | 137865/371472 [10:57:09<17:47:21, 3.65it/s] 37%|███▋ | 137866/371472 [10:57:09<17:28:23, 3.71it/s] 37%|███▋ | 137867/371472 [10:57:10<17:00:35, 3.81it/s] 37%|███▋ | 137868/371472 [10:57:10<16:56:14, 3.83it/s] 37%|███▋ | 137869/371472 [10:57:10<16:59:54, 3.82it/s] 37%|███▋ | 137870/371472 [10:57:10<17:18:40, 3.75it/s] 37%|███▋ | 137871/371472 [10:57:11<16:59:13, 3.82it/s] 37%|███▋ | 137872/371472 [10:57:11<17:10:20, 3.78it/s] 37%|███▋ | 137873/371472 [10:57:11<17:29:02, 3.71it/s] 37%|███▋ | 137874/371472 [10:57:11<17:20:30, 3.74it/s] 37%|███▋ | 137875/371472 [10:57:12<18:14:28, 3.56it/s] 37%|███▋ | 137876/371472 [10:57:12<17:58:31, 3.61it/s] 37%|███▋ | 137877/371472 [10:57:12<18:58:53, 3.42it/s] 37%|███▋ | 137878/371472 [10:57:13<19:23:18, 3.35it/s] 37%|███▋ | 137879/371472 [10:57:13<18:32:51, 3.50it/s] 37%|███▋ | 137880/371472 [10:57:13<18:22:17, 3.53it/s] {'loss': 3.0939, 'learning_rate': 6.662500808032924e-07, 'epoch': 5.94} + 37%|███▋ | 137880/371472 [10:57:13<18:22:17, 3.53it/s] 37%|███▋ | 137881/371472 [10:57:13<18:24:08, 3.53it/s] 37%|███▋ | 137882/371472 [10:57:14<17:44:29, 3.66it/s] 37%|███▋ | 137883/371472 [10:57:14<17:57:28, 3.61it/s] 37%|███▋ | 137884/371472 [10:57:14<17:56:05, 3.62it/s] 37%|███▋ | 137885/371472 [10:57:15<20:55:28, 3.10it/s] 37%|███▋ | 137886/371472 [10:57:15<21:03:17, 3.08it/s] 37%|███▋ | 137887/371472 [10:57:16<24:42:12, 2.63it/s] 37%|███▋ | 137888/371472 [10:57:16<22:06:09, 2.94it/s] 37%|███▋ | 137889/371472 [10:57:16<20:28:23, 3.17it/s] 37%|███▋ | 137890/371472 [10:57:16<19:44:43, 3.29it/s] 37%|███▋ | 137891/371472 [10:57:17<18:21:04, 3.54it/s] 37%|███▋ | 137892/371472 [10:57:17<18:45:00, 3.46it/s] 37%|███▋ | 137893/371472 [10:57:17<18:04:11, 3.59it/s] 37%|███▋ | 137894/371472 [10:57:17<18:32:58, 3.50it/s] 37%|███▋ | 137895/371472 [10:57:18<18:17:46, 3.55it/s] 37%|███▋ | 137896/371472 [10:57:18<18:47:56, 3.45it/s] 37%|███▋ | 137897/371472 [10:57:18<19:25:04, 3.34it/s] 37%|███▋ | 137898/371472 [10:57:19<19:55:26, 3.26it/s] 37%|███▋ | 137899/371472 [10:57:19<19:05:40, 3.40it/s] 37%|███▋ | 137900/371472 [10:57:19<18:08:31, 3.58it/s] {'loss': 3.0609, 'learning_rate': 6.662015988278136e-07, 'epoch': 5.94} + 37%|███▋ | 137900/371472 [10:57:19<18:08:31, 3.58it/s] 37%|███▋ | 137901/371472 [10:57:19<17:52:19, 3.63it/s] 37%|███▋ | 137902/371472 [10:57:20<17:49:47, 3.64it/s] 37%|███▋ | 137903/371472 [10:57:20<18:07:02, 3.58it/s] 37%|███▋ | 137904/371472 [10:57:20<18:12:41, 3.56it/s] 37%|███▋ | 137905/371472 [10:57:21<18:09:53, 3.57it/s] 37%|███▋ | 137906/371472 [10:57:21<18:52:34, 3.44it/s] 37%|███▋ | 137907/371472 [10:57:21<18:37:27, 3.48it/s] 37%|███▋ | 137908/371472 [10:57:21<17:52:41, 3.63it/s] 37%|███▋ | 137909/371472 [10:57:22<20:26:41, 3.17it/s] 37%|███▋ | 137910/371472 [10:57:22<20:57:45, 3.09it/s] 37%|███▋ | 137911/371472 [10:57:22<20:56:09, 3.10it/s] 37%|███▋ | 137912/371472 [10:57:23<19:19:49, 3.36it/s] 37%|███▋ | 137913/371472 [10:57:23<18:31:08, 3.50it/s] 37%|███▋ | 137914/371472 [10:57:23<18:08:05, 3.58it/s] 37%|███▋ | 137915/371472 [10:57:23<17:44:13, 3.66it/s] 37%|███▋ | 137916/371472 [10:57:24<17:03:46, 3.80it/s] 37%|███▋ | 137917/371472 [10:57:24<16:35:20, 3.91it/s] 37%|███▋ | 137918/371472 [10:57:24<17:51:31, 3.63it/s] 37%|███▋ | 137919/371472 [10:57:25<17:51:52, 3.63it/s] 37%|███▋ | 137920/371472 [10:57:25<17:29:04, 3.71it/s] {'loss': 3.2201, 'learning_rate': 6.661531168523347e-07, 'epoch': 5.94} + 37%|███▋ | 137920/371472 [10:57:25<17:29:04, 3.71it/s] 37%|███▋ | 137921/371472 [10:57:25<17:39:24, 3.67it/s] 37%|███▋ | 137922/371472 [10:57:25<17:04:29, 3.80it/s] 37%|███▋ | 137923/371472 [10:57:26<17:47:54, 3.64it/s] 37%|███▋ | 137924/371472 [10:57:26<18:10:09, 3.57it/s] 37%|███▋ | 137925/371472 [10:57:26<17:11:39, 3.77it/s] 37%|███▋ | 137926/371472 [10:57:26<18:15:32, 3.55it/s] 37%|███▋ | 137927/371472 [10:57:27<17:36:45, 3.68it/s] 37%|███▋ | 137928/371472 [10:57:27<17:05:43, 3.79it/s] 37%|███▋ | 137929/371472 [10:57:27<17:01:37, 3.81it/s] 37%|███▋ | 137930/371472 [10:57:27<16:45:29, 3.87it/s] 37%|███▋ | 137931/371472 [10:57:28<16:35:15, 3.91it/s] 37%|███▋ | 137932/371472 [10:57:28<17:46:27, 3.65it/s] 37%|███▋ | 137933/371472 [10:57:28<17:32:56, 3.70it/s] 37%|███▋ | 137934/371472 [10:57:29<17:22:14, 3.73it/s] 37%|███▋ | 137935/371472 [10:57:29<16:44:07, 3.88it/s] 37%|███▋ | 137936/371472 [10:57:29<17:25:16, 3.72it/s] 37%|███▋ | 137937/371472 [10:57:29<18:03:02, 3.59it/s] 37%|███▋ | 137938/371472 [10:57:30<17:38:48, 3.68it/s] 37%|███▋ | 137939/371472 [10:57:30<17:23:00, 3.73it/s] 37%|███▋ | 137940/371472 [10:57:30<18:02:48, 3.59it/s] {'loss': 3.314, 'learning_rate': 6.661046348768556e-07, 'epoch': 5.94} + 37%|███▋ | 137940/371472 [10:57:30<18:02:48, 3.59it/s] 37%|███▋ | 137941/371472 [10:57:30<17:43:57, 3.66it/s] 37%|███▋ | 137942/371472 [10:57:31<18:09:25, 3.57it/s] 37%|███▋ | 137943/371472 [10:57:31<17:36:47, 3.68it/s] 37%|███▋ | 137944/371472 [10:57:31<17:05:23, 3.80it/s] 37%|███▋ | 137945/371472 [10:57:32<17:11:33, 3.77it/s] 37%|███▋ | 137946/371472 [10:57:32<17:28:30, 3.71it/s] 37%|███▋ | 137947/371472 [10:57:32<17:16:42, 3.75it/s] 37%|███▋ | 137948/371472 [10:57:32<16:52:21, 3.84it/s] 37%|███▋ | 137949/371472 [10:57:33<16:30:52, 3.93it/s] 37%|███▋ | 137950/371472 [10:57:33<17:24:50, 3.73it/s] 37%|███▋ | 137951/371472 [10:57:33<18:09:41, 3.57it/s] 37%|███▋ | 137952/371472 [10:57:33<17:58:54, 3.61it/s] 37%|███▋ | 137953/371472 [10:57:34<18:11:05, 3.57it/s] 37%|███▋ | 137954/371472 [10:57:34<18:07:54, 3.58it/s] 37%|███▋ | 137955/371472 [10:57:34<17:59:40, 3.60it/s] 37%|███▋ | 137956/371472 [10:57:35<18:41:29, 3.47it/s] 37%|███▋ | 137957/371472 [10:57:35<17:35:23, 3.69it/s] 37%|███▋ | 137958/371472 [10:57:35<17:54:16, 3.62it/s] 37%|███▋ | 137959/371472 [10:57:35<18:23:05, 3.53it/s] 37%|███▋ | 137960/371472 [10:57:36<17:24:21, 3.73it/s] {'loss': 3.0459, 'learning_rate': 6.660561529013768e-07, 'epoch': 5.94} + 37%|███▋ | 137960/371472 [10:57:36<17:24:21, 3.73it/s] 37%|███▋ | 137961/371472 [10:57:36<18:14:31, 3.56it/s] 37%|███▋ | 137962/371472 [10:57:36<18:12:24, 3.56it/s] 37%|███▋ | 137963/371472 [10:57:36<17:26:28, 3.72it/s] 37%|███▋ | 137964/371472 [10:57:37<17:31:14, 3.70it/s] 37%|███▋ | 137965/371472 [10:57:37<17:26:25, 3.72it/s] 37%|███▋ | 137966/371472 [10:57:37<17:23:15, 3.73it/s] 37%|███▋ | 137967/371472 [10:57:38<17:41:46, 3.67it/s] 37%|███▋ | 137968/371472 [10:57:38<17:43:30, 3.66it/s] 37%|███▋ | 137969/371472 [10:57:38<18:56:32, 3.42it/s] 37%|███▋ | 137970/371472 [10:57:38<18:39:38, 3.48it/s] 37%|███▋ | 137971/371472 [10:57:39<17:53:43, 3.62it/s] 37%|███▋ | 137972/371472 [10:57:39<17:59:13, 3.61it/s] 37%|███▋ | 137973/371472 [10:57:39<17:41:33, 3.67it/s] 37%|███▋ | 137974/371472 [10:57:40<18:24:47, 3.52it/s] 37%|███▋ | 137975/371472 [10:57:40<18:08:07, 3.58it/s] 37%|███▋ | 137976/371472 [10:57:40<17:58:37, 3.61it/s] 37%|███▋ | 137977/371472 [10:57:40<19:58:02, 3.25it/s] 37%|███▋ | 137978/371472 [10:57:41<19:25:49, 3.34it/s] 37%|███▋ | 137979/371472 [10:57:41<18:51:37, 3.44it/s] 37%|███▋ | 137980/371472 [10:57:41<17:55:03, 3.62it/s] {'loss': 3.191, 'learning_rate': 6.66007670925898e-07, 'epoch': 5.94} + 37%|███▋ | 137980/371472 [10:57:41<17:55:03, 3.62it/s] 37%|███▋ | 137981/371472 [10:57:41<17:20:19, 3.74it/s] 37%|███▋ | 137982/371472 [10:57:42<17:27:53, 3.71it/s] 37%|███▋ | 137983/371472 [10:57:42<17:43:23, 3.66it/s] 37%|███▋ | 137984/371472 [10:57:42<18:04:11, 3.59it/s] 37%|███▋ | 137985/371472 [10:57:43<17:10:03, 3.78it/s] 37%|███▋ | 137986/371472 [10:57:43<17:58:16, 3.61it/s] 37%|███▋ | 137987/371472 [10:57:43<17:25:02, 3.72it/s] 37%|███▋ | 137988/371472 [10:57:43<17:43:26, 3.66it/s] 37%|███▋ | 137989/371472 [10:57:44<17:41:35, 3.67it/s] 37%|███▋ | 137990/371472 [10:57:44<19:46:29, 3.28it/s] 37%|███▋ | 137991/371472 [10:57:44<19:04:56, 3.40it/s] 37%|███▋ | 137992/371472 [10:57:45<18:46:31, 3.45it/s] 37%|███▋ | 137993/371472 [10:57:45<19:09:08, 3.39it/s] 37%|███▋ | 137994/371472 [10:57:45<18:44:35, 3.46it/s] 37%|███▋ | 137995/371472 [10:57:45<18:50:45, 3.44it/s] 37%|███▋ | 137996/371472 [10:57:46<19:28:27, 3.33it/s] 37%|███▋ | 137997/371472 [10:57:46<18:44:35, 3.46it/s] 37%|███▋ | 137998/371472 [10:57:46<18:52:16, 3.44it/s] 37%|███▋ | 137999/371472 [10:57:47<19:06:54, 3.39it/s] 37%|███▋ | 138000/371472 [10:57:47<18:52:02, 3.44it/s] {'loss': 3.4345, 'learning_rate': 6.659591889504191e-07, 'epoch': 5.94} + 37%|███▋ | 138000/371472 [10:57:47<18:52:02, 3.44it/s] 37%|███▋ | 138001/371472 [10:57:47<19:13:07, 3.37it/s] 37%|███▋ | 138002/371472 [10:57:48<19:18:50, 3.36it/s] 37%|███▋ | 138003/371472 [10:57:48<18:10:19, 3.57it/s] 37%|███▋ | 138004/371472 [10:57:48<18:18:19, 3.54it/s] 37%|███▋ | 138005/371472 [10:57:48<19:26:33, 3.34it/s] 37%|███▋ | 138006/371472 [10:57:49<21:00:14, 3.09it/s] 37%|███▋ | 138007/371472 [10:57:49<20:45:47, 3.12it/s] 37%|███▋ | 138008/371472 [10:57:49<20:24:51, 3.18it/s] 37%|███▋ | 138009/371472 [10:57:50<19:49:07, 3.27it/s] 37%|███▋ | 138010/371472 [10:57:50<21:01:05, 3.09it/s] 37%|███▋ | 138011/371472 [10:57:50<20:32:00, 3.16it/s] 37%|███▋ | 138012/371472 [10:57:51<20:56:26, 3.10it/s] 37%|███▋ | 138013/371472 [10:57:51<21:32:00, 3.01it/s] 37%|███▋ | 138014/371472 [10:57:51<20:06:32, 3.22it/s] 37%|███▋ | 138015/371472 [10:57:52<21:11:30, 3.06it/s] 37%|███▋ | 138016/371472 [10:57:52<20:12:09, 3.21it/s] 37%|███▋ | 138017/371472 [10:57:52<19:47:01, 3.28it/s] 37%|███▋ | 138018/371472 [10:57:53<18:37:53, 3.48it/s] 37%|███▋ | 138019/371472 [10:57:53<19:55:52, 3.25it/s] 37%|███▋ | 138020/371472 [10:57:53<19:16:00, 3.37it/s] {'loss': 2.9941, 'learning_rate': 6.659107069749401e-07, 'epoch': 5.94} + 37%|███▋ | 138020/371472 [10:57:53<19:16:00, 3.37it/s] 37%|█��█▋ | 138021/371472 [10:57:53<18:57:08, 3.42it/s] 37%|███▋ | 138022/371472 [10:57:54<18:35:37, 3.49it/s] 37%|███▋ | 138023/371472 [10:57:54<18:04:20, 3.59it/s] 37%|███▋ | 138024/371472 [10:57:54<17:19:53, 3.74it/s] 37%|███▋ | 138025/371472 [10:57:54<17:09:29, 3.78it/s] 37%|███▋ | 138026/371472 [10:57:55<19:09:41, 3.38it/s] 37%|███▋ | 138027/371472 [10:57:55<18:15:18, 3.55it/s] 37%|███▋ | 138028/371472 [10:57:55<17:52:01, 3.63it/s] 37%|███▋ | 138029/371472 [10:57:56<18:24:55, 3.52it/s] 37%|███▋ | 138030/371472 [10:57:56<17:44:27, 3.66it/s] 37%|███▋ | 138031/371472 [10:57:56<17:20:16, 3.74it/s] 37%|███▋ | 138032/371472 [10:57:56<17:18:32, 3.75it/s] 37%|███▋ | 138033/371472 [10:57:57<16:54:29, 3.84it/s] 37%|███▋ | 138034/371472 [10:57:57<17:34:57, 3.69it/s] 37%|███▋ | 138035/371472 [10:57:57<17:22:52, 3.73it/s] 37%|███▋ | 138036/371472 [10:57:58<18:16:01, 3.55it/s] 37%|███▋ | 138037/371472 [10:57:58<17:53:12, 3.63it/s] 37%|███▋ | 138038/371472 [10:57:58<18:44:01, 3.46it/s] 37%|███▋ | 138039/371472 [10:57:58<18:21:30, 3.53it/s] 37%|███▋ | 138040/371472 [10:57:59<20:05:36, 3.23it/s] {'loss': 3.1053, 'learning_rate': 6.658622249994613e-07, 'epoch': 5.95} + 37%|███▋ | 138040/371472 [10:57:59<20:05:36, 3.23it/s] 37%|███▋ | 138041/371472 [10:57:59<19:07:09, 3.39it/s] 37%|███▋ | 138042/371472 [10:57:59<20:11:18, 3.21it/s] 37%|███▋ | 138043/371472 [10:58:00<19:45:44, 3.28it/s] 37%|███▋ | 138044/371472 [10:58:00<18:49:39, 3.44it/s] 37%|███▋ | 138045/371472 [10:58:00<19:54:46, 3.26it/s] 37%|███▋ | 138046/371472 [10:58:01<19:05:41, 3.40it/s] 37%|███▋ | 138047/371472 [10:58:01<19:33:47, 3.31it/s] 37%|███▋ | 138048/371472 [10:58:01<19:02:27, 3.41it/s] 37%|███▋ | 138049/371472 [10:58:01<18:17:35, 3.54it/s] 37%|███▋ | 138050/371472 [10:58:02<18:20:12, 3.54it/s] 37%|███▋ | 138051/371472 [10:58:02<17:54:12, 3.62it/s] 37%|███▋ | 138052/371472 [10:58:02<18:12:52, 3.56it/s] 37%|███▋ | 138053/371472 [10:58:02<17:49:52, 3.64it/s] 37%|███▋ | 138054/371472 [10:58:03<17:26:54, 3.72it/s] 37%|███▋ | 138055/371472 [10:58:03<18:30:52, 3.50it/s] 37%|███▋ | 138056/371472 [10:58:03<20:03:00, 3.23it/s] 37%|███▋ | 138057/371472 [10:58:04<18:47:02, 3.45it/s] 37%|███▋ | 138058/371472 [10:58:04<19:10:23, 3.38it/s] 37%|███▋ | 138059/371472 [10:58:04<18:53:03, 3.43it/s] 37%|███▋ | 138060/371472 [10:58:05<19:07:14, 3.39it/s] {'loss': 3.0454, 'learning_rate': 6.658137430239824e-07, 'epoch': 5.95} + 37%|███▋ | 138060/371472 [10:58:05<19:07:14, 3.39it/s] 37%|███▋ | 138061/371472 [10:58:05<19:05:00, 3.40it/s] 37%|███▋ | 138062/371472 [10:58:05<20:08:17, 3.22it/s] 37%|███▋ | 138063/371472 [10:58:05<19:30:55, 3.32it/s] 37%|███▋ | 138064/371472 [10:58:06<18:19:19, 3.54it/s] 37%|███▋ | 138065/371472 [10:58:06<18:33:44, 3.49it/s] 37%|███▋ | 138066/371472 [10:58:06<19:51:35, 3.26it/s] 37%|███▋ | 138067/371472 [10:58:07<18:34:02, 3.49it/s] 37%|███▋ | 138068/371472 [10:58:07<19:21:40, 3.35it/s] 37%|███▋ | 138069/371472 [10:58:07<19:34:14, 3.31it/s] 37%|███▋ | 138070/371472 [10:58:08<22:28:58, 2.88it/s] 37%|███▋ | 138071/371472 [10:58:08<20:45:46, 3.12it/s] 37%|███▋ | 138072/371472 [10:58:08<19:36:09, 3.31it/s] 37%|███▋ | 138073/371472 [10:58:08<19:12:54, 3.37it/s] 37%|███▋ | 138074/371472 [10:58:09<18:25:20, 3.52it/s] 37%|███▋ | 138075/371472 [10:58:09<17:48:10, 3.64it/s] 37%|███▋ | 138076/371472 [10:58:09<17:25:26, 3.72it/s] 37%|███▋ | 138077/371472 [10:58:10<17:58:02, 3.61it/s] 37%|███▋ | 138078/371472 [10:58:10<17:53:11, 3.62it/s] 37%|███▋ | 138079/371472 [10:58:10<19:22:39, 3.35it/s] 37%|███▋ | 138080/371472 [10:58:10<18:25:07, 3.52it/s] {'loss': 3.05, 'learning_rate': 6.657652610485035e-07, 'epoch': 5.95} + 37%|███▋ | 138080/371472 [10:58:10<18:25:07, 3.52it/s] 37%|███▋ | 138081/371472 [10:58:11<18:46:26, 3.45it/s] 37%|███▋ | 138082/371472 [10:58:11<18:03:08, 3.59it/s] 37%|███▋ | 138083/371472 [10:58:11<18:05:05, 3.58it/s] 37%|███▋ | 138084/371472 [10:58:12<18:22:26, 3.53it/s] 37%|███▋ | 138085/371472 [10:58:12<18:18:43, 3.54it/s] 37%|███▋ | 138086/371472 [10:58:12<19:16:17, 3.36it/s] 37%|███▋ | 138087/371472 [10:58:12<19:46:15, 3.28it/s] 37%|███▋ | 138088/371472 [10:58:13<18:27:56, 3.51it/s] 37%|███▋ | 138089/371472 [10:58:13<18:57:03, 3.42it/s] 37%|███▋ | 138090/371472 [10:58:13<19:36:05, 3.31it/s] 37%|███▋ | 138091/371472 [10:58:14<18:50:19, 3.44it/s] 37%|███▋ | 138092/371472 [10:58:14<18:13:39, 3.56it/s] 37%|███▋ | 138093/371472 [10:58:14<18:04:41, 3.59it/s] 37%|███▋ | 138094/371472 [10:58:14<18:32:47, 3.50it/s] 37%|███▋ | 138095/371472 [10:58:15<20:06:25, 3.22it/s] 37%|███▋ | 138096/371472 [10:58:15<18:53:07, 3.43it/s] 37%|███▋ | 138097/371472 [10:58:15<17:50:58, 3.63it/s] 37%|███▋ | 138098/371472 [10:58:16<18:06:22, 3.58it/s] 37%|███▋ | 138099/371472 [10:58:16<19:16:01, 3.36it/s] 37%|███▋ | 138100/371472 [10:58:16<18:42:38, 3.46it/s] {'loss': 3.1033, 'learning_rate': 6.657167790730245e-07, 'epoch': 5.95} + 37%|███▋ | 138100/371472 [10:58:16<18:42:38, 3.46it/s] 37%|███▋ | 138101/371472 [10:58:16<18:30:04, 3.50it/s] 37%|███▋ | 138102/371472 [10:58:17<19:17:46, 3.36it/s] 37%|███▋ | 138103/371472 [10:58:17<19:03:20, 3.40it/s] 37%|███▋ | 138104/371472 [10:58:17<18:23:12, 3.53it/s] 37%|███▋ | 138105/371472 [10:58:18<18:03:28, 3.59it/s] 37%|███▋ | 138106/371472 [10:58:18<19:27:04, 3.33it/s] 37%|███▋ | 138107/371472 [10:58:18<18:30:24, 3.50it/s] 37%|███▋ | 138108/371472 [10:58:19<18:44:57, 3.46it/s] 37%|███▋ | 138109/371472 [10:58:19<18:20:52, 3.53it/s] 37%|███▋ | 138110/371472 [10:58:19<17:32:35, 3.70it/s] 37%|███▋ | 138111/371472 [10:58:19<18:44:15, 3.46it/s] 37%|███▋ | 138112/371472 [10:58:20<17:50:48, 3.63it/s] 37%|███▋ | 138113/371472 [10:58:20<18:57:45, 3.42it/s] 37%|███▋ | 138114/371472 [10:58:20<18:50:56, 3.44it/s] 37%|███▋ | 138115/371472 [10:58:20<18:11:16, 3.56it/s] 37%|███▋ | 138116/371472 [10:58:21<18:39:55, 3.47it/s] 37%|███▋ | 138117/371472 [10:58:21<18:07:16, 3.58it/s] 37%|███▋ | 138118/371472 [10:58:21<18:13:38, 3.56it/s] 37%|███▋ | 138119/371472 [10:58:22<18:23:42, 3.52it/s] 37%|███▋ | 138120/371472 [10:58:22<18:02:03, 3.59it/s] {'loss': 2.9416, 'learning_rate': 6.656682970975457e-07, 'epoch': 5.95} + 37%|███▋ | 138120/371472 [10:58:22<18:02:03, 3.59it/s] 37%|███▋ | 138121/371472 [10:58:22<18:40:03, 3.47it/s] 37%|███▋ | 138122/371472 [10:58:22<18:05:00, 3.58it/s] 37%|███▋ | 138123/371472 [10:58:23<19:13:42, 3.37it/s] 37%|███▋ | 138124/371472 [10:58:23<18:25:05, 3.52it/s] 37%|███▋ | 138125/371472 [10:58:23<17:51:50, 3.63it/s] 37%|███▋ | 138126/371472 [10:58:24<17:34:12, 3.69it/s] 37%|███▋ | 138127/371472 [10:58:24<17:32:17, 3.70it/s] 37%|███▋ | 138128/371472 [10:58:24<16:49:31, 3.85it/s] 37%|███▋ | 138129/371472 [10:58:24<17:13:02, 3.76it/s] 37%|███▋ | 138130/371472 [10:58:25<17:24:47, 3.72it/s] 37%|███▋ | 138131/371472 [10:58:25<17:29:15, 3.71it/s] 37%|███▋ | 138132/371472 [10:58:25<17:54:01, 3.62it/s] 37%|███▋ | 138133/371472 [10:58:25<17:42:58, 3.66it/s] 37%|███▋ | 138134/371472 [10:58:26<17:22:42, 3.73it/s] 37%|███▋ | 138135/371472 [10:58:26<17:31:07, 3.70it/s] 37%|███▋ | 138136/371472 [10:58:26<17:10:53, 3.77it/s] 37%|███▋ | 138137/371472 [10:58:27<17:37:55, 3.68it/s] 37%|███▋ | 138138/371472 [10:58:27<18:00:36, 3.60it/s] 37%|███▋ | 138139/371472 [10:58:27<17:39:11, 3.67it/s] 37%|███▋ | 138140/371472 [10:58:27<18:24:06, 3.52it/s] {'loss': 3.1685, 'learning_rate': 6.656198151220669e-07, 'epoch': 5.95} + 37%|███▋ | 138140/371472 [10:58:27<18:24:06, 3.52it/s] 37%|███▋ | 138141/371472 [10:58:28<19:11:49, 3.38it/s] 37%|███▋ | 138142/371472 [10:58:28<19:49:39, 3.27it/s] 37%|███▋ | 138143/371472 [10:58:28<19:17:18, 3.36it/s] 37%|███▋ | 138144/371472 [10:58:29<18:56:10, 3.42it/s] 37%|███▋ | 138145/371472 [10:58:29<18:48:21, 3.45it/s] 37%|███▋ | 138146/371472 [10:58:29<18:24:18, 3.52it/s] 37%|███▋ | 138147/371472 [10:58:29<17:47:32, 3.64it/s] 37%|███▋ | 138148/371472 [10:58:30<17:51:50, 3.63it/s] 37%|███▋ | 138149/371472 [10:58:30<17:37:41, 3.68it/s] 37%|███▋ | 138150/371472 [10:58:30<17:34:15, 3.69it/s] 37%|███▋ | 138151/371472 [10:58:30<17:13:12, 3.76it/s] 37%|███▋ | 138152/371472 [10:58:31<17:19:41, 3.74it/s] 37%|███▋ | 138153/371472 [10:58:31<18:31:08, 3.50it/s] 37%|███▋ | 138154/371472 [10:58:31<18:35:48, 3.49it/s] 37%|███▋ | 138155/371472 [10:58:32<18:31:10, 3.50it/s] 37%|███▋ | 138156/371472 [10:58:32<18:25:57, 3.52it/s] 37%|███▋ | 138157/371472 [10:58:32<17:45:58, 3.65it/s] 37%|███▋ | 138158/371472 [10:58:32<18:14:00, 3.55it/s] 37%|███▋ | 138159/371472 [10:58:33<17:26:53, 3.71it/s] 37%|███▋ | 138160/371472 [10:58:33<17:04:23, 3.80it/s] {'loss': 3.1556, 'learning_rate': 6.65571333146588e-07, 'epoch': 5.95} + 37%|███▋ | 138160/371472 [10:58:33<17:04:23, 3.80it/s] 37%|███▋ | 138161/371472 [10:58:33<16:26:38, 3.94it/s] 37%|███▋ | 138162/371472 [10:58:34<18:01:33, 3.60it/s] 37%|███▋ | 138163/371472 [10:58:34<17:22:37, 3.73it/s] 37%|███▋ | 138164/371472 [10:58:34<17:01:19, 3.81it/s] 37%|███▋ | 138165/371472 [10:58:34<16:33:11, 3.92it/s] 37%|███▋ | 138166/371472 [10:58:35<16:30:40, 3.93it/s] 37%|███▋ | 138167/371472 [10:58:35<17:31:40, 3.70it/s] 37%|███▋ | 138168/371472 [10:58:35<17:42:52, 3.66it/s] 37%|███▋ | 138169/371472 [10:58:35<17:22:27, 3.73it/s] 37%|███▋ | 138170/371472 [10:58:36<17:37:47, 3.68it/s] 37%|███▋ | 138171/371472 [10:58:36<17:40:14, 3.67it/s] 37%|███▋ | 138172/371472 [10:58:36<18:08:54, 3.57it/s] 37%|███▋ | 138173/371472 [10:58:37<18:56:22, 3.42it/s] 37%|███▋ | 138174/371472 [10:58:37<18:47:09, 3.45it/s] 37%|███▋ | 138175/371472 [10:58:37<17:54:36, 3.62it/s] 37%|███▋ | 138176/371472 [10:58:37<17:17:16, 3.75it/s] 37%|███▋ | 138177/371472 [10:58:38<17:47:33, 3.64it/s] 37%|███▋ | 138178/371472 [10:58:38<17:56:09, 3.61it/s] 37%|███▋ | 138179/371472 [10:58:38<18:23:06, 3.52it/s] 37%|███▋ | 138180/371472 [10:58:38<18:57:18, 3.42it/s] {'loss': 3.2782, 'learning_rate': 6.65522851171109e-07, 'epoch': 5.95} + 37%|███▋ | 138180/371472 [10:58:38<18:57:18, 3.42it/s] 37%|███▋ | 138181/371472 [10:58:39<18:48:38, 3.44it/s] 37%|███▋ | 138182/371472 [10:58:39<19:33:34, 3.31it/s] 37%|███▋ | 138183/371472 [10:58:39<18:50:54, 3.44it/s] 37%|███▋ | 138184/371472 [10:58:40<18:01:45, 3.59it/s] 37%|███▋ | 138185/371472 [10:58:40<17:33:11, 3.69it/s] 37%|███▋ | 138186/371472 [10:58:40<16:45:43, 3.87it/s] 37%|███▋ | 138187/371472 [10:58:40<17:36:49, 3.68it/s] 37%|███▋ | 138188/371472 [10:58:41<17:22:53, 3.73it/s] 37%|███▋ | 138189/371472 [10:58:41<16:48:50, 3.85it/s] 37%|███▋ | 138190/371472 [10:58:41<20:17:32, 3.19it/s] 37%|███▋ | 138191/371472 [10:58:42<19:53:42, 3.26it/s] 37%|███▋ | 138192/371472 [10:58:42<19:01:57, 3.40it/s] 37%|███▋ | 138193/371472 [10:58:42<18:26:33, 3.51it/s] 37%|███▋ | 138194/371472 [10:58:42<17:55:21, 3.62it/s] 37%|███▋ | 138195/371472 [10:58:43<17:24:58, 3.72it/s] 37%|███▋ | 138196/371472 [10:58:43<17:24:37, 3.72it/s] 37%|███▋ | 138197/371472 [10:58:43<18:01:40, 3.59it/s] 37%|███▋ | 138198/371472 [10:58:44<18:04:03, 3.59it/s] 37%|███▋ | 138199/371472 [10:58:44<17:18:40, 3.74it/s] 37%|███▋ | 138200/371472 [10:58:44<17:06:20, 3.79it/s] {'loss': 3.0903, 'learning_rate': 6.654743691956302e-07, 'epoch': 5.95} + 37%|███▋ | 138200/371472 [10:58:44<17:06:20, 3.79it/s] 37%|███▋ | 138201/371472 [10:58:44<17:25:51, 3.72it/s] 37%|███▋ | 138202/371472 [10:58:45<18:52:29, 3.43it/s] 37%|███▋ | 138203/371472 [10:58:45<19:50:11, 3.27it/s] 37%|███▋ | 138204/371472 [10:58:45<18:44:16, 3.46it/s] 37%|███▋ | 138205/371472 [10:58:45<17:51:26, 3.63it/s] 37%|███▋ | 138206/371472 [10:58:46<17:23:56, 3.72it/s] 37%|███▋ | 138207/371472 [10:58:46<19:38:41, 3.30it/s] 37%|███▋ | 138208/371472 [10:58:46<19:05:22, 3.39it/s] 37%|███▋ | 138209/371472 [10:58:47<19:43:19, 3.29it/s] 37%|███▋ | 138210/371472 [10:58:47<19:01:27, 3.41it/s] 37%|███▋ | 138211/371472 [10:58:47<20:02:42, 3.23it/s] 37%|███▋ | 138212/371472 [10:58:48<18:58:53, 3.41it/s] 37%|███▋ | 138213/371472 [10:58:48<18:26:45, 3.51it/s] 37%|█��█▋ | 138214/371472 [10:58:48<17:43:06, 3.66it/s] 37%|███▋ | 138215/371472 [10:58:48<19:36:35, 3.30it/s] 37%|███▋ | 138216/371472 [10:58:49<19:03:04, 3.40it/s] 37%|███▋ | 138217/371472 [10:58:49<18:15:12, 3.55it/s] 37%|███▋ | 138218/371472 [10:58:49<18:03:37, 3.59it/s] 37%|███▋ | 138219/371472 [10:58:50<18:49:52, 3.44it/s] 37%|███▋ | 138220/371472 [10:58:50<17:49:11, 3.64it/s] {'loss': 3.2313, 'learning_rate': 6.654258872201513e-07, 'epoch': 5.95} + 37%|███▋ | 138220/371472 [10:58:50<17:49:11, 3.64it/s] 37%|███▋ | 138221/371472 [10:58:50<17:56:28, 3.61it/s] 37%|███▋ | 138222/371472 [10:58:50<17:29:36, 3.70it/s] 37%|███▋ | 138223/371472 [10:58:51<16:43:47, 3.87it/s] 37%|███▋ | 138224/371472 [10:58:51<16:25:56, 3.94it/s] 37%|███▋ | 138225/371472 [10:58:51<16:00:57, 4.05it/s] 37%|███▋ | 138226/371472 [10:58:51<17:25:04, 3.72it/s] 37%|███▋ | 138227/371472 [10:58:52<17:40:06, 3.67it/s] 37%|███▋ | 138228/371472 [10:58:52<20:32:38, 3.15it/s] 37%|███▋ | 138229/371472 [10:58:52<19:32:43, 3.31it/s] 37%|███▋ | 138230/371472 [10:58:53<18:30:28, 3.50it/s] 37%|███▋ | 138231/371472 [10:58:53<20:04:18, 3.23it/s] 37%|███▋ | 138232/371472 [10:58:53<18:57:48, 3.42it/s] 37%|███▋ | 138233/371472 [10:58:53<17:45:29, 3.65it/s] 37%|███▋ | 138234/371472 [10:58:54<18:37:51, 3.48it/s] 37%|███▋ | 138235/371472 [10:58:54<17:43:12, 3.66it/s] 37%|███▋ | 138236/371472 [10:58:54<17:54:36, 3.62it/s] 37%|███▋ | 138237/371472 [10:58:55<17:52:02, 3.63it/s] 37%|███▋ | 138238/371472 [10:58:55<17:36:24, 3.68it/s] 37%|███▋ | 138239/371472 [10:58:55<17:44:27, 3.65it/s] 37%|███▋ | 138240/371472 [10:58:55<17:28:58, 3.71it/s] {'loss': 3.079, 'learning_rate': 6.653774052446724e-07, 'epoch': 5.95} + 37%|███▋ | 138240/371472 [10:58:55<17:28:58, 3.71it/s] 37%|███▋ | 138241/371472 [10:58:56<17:54:35, 3.62it/s] 37%|███▋ | 138242/371472 [10:58:56<17:10:45, 3.77it/s] 37%|███▋ | 138243/371472 [10:58:56<17:09:52, 3.77it/s] 37%|███▋ | 138244/371472 [10:58:56<16:59:44, 3.81it/s] 37%|███▋ | 138245/371472 [10:58:57<17:31:53, 3.70it/s] 37%|███▋ | 138246/371472 [10:58:57<17:23:17, 3.73it/s] 37%|███▋ | 138247/371472 [10:58:57<17:35:47, 3.68it/s] 37%|███▋ | 138248/371472 [10:58:57<17:12:12, 3.77it/s] 37%|███▋ | 138249/371472 [10:58:58<17:14:29, 3.76it/s] 37%|███▋ | 138250/371472 [10:58:58<17:31:22, 3.70it/s] 37%|███▋ | 138251/371472 [10:58:58<17:56:00, 3.61it/s] 37%|███▋ | 138252/371472 [10:58:59<17:56:51, 3.61it/s] 37%|███▋ | 138253/371472 [10:58:59<17:57:20, 3.61it/s] 37%|███▋ | 138254/371472 [10:58:59<17:27:05, 3.71it/s] 37%|███▋ | 138255/371472 [10:58:59<17:02:59, 3.80it/s] 37%|███▋ | 138256/371472 [10:59:00<17:36:12, 3.68it/s] 37%|███▋ | 138257/371472 [10:59:00<17:25:04, 3.72it/s] 37%|███▋ | 138258/371472 [10:59:00<18:00:11, 3.60it/s] 37%|███▋ | 138259/371472 [10:59:01<17:50:50, 3.63it/s] 37%|███▋ | 138260/371472 [10:59:01<17:45:06, 3.65it/s] {'loss': 3.082, 'learning_rate': 6.653289232691934e-07, 'epoch': 5.96} + 37%|███▋ | 138260/371472 [10:59:01<17:45:06, 3.65it/s] 37%|███▋ | 138261/371472 [10:59:01<17:42:05, 3.66it/s] 37%|███▋ | 138262/371472 [10:59:01<18:30:51, 3.50it/s] 37%|███▋ | 138263/371472 [10:59:02<19:12:10, 3.37it/s] 37%|███▋ | 138264/371472 [10:59:02<18:30:48, 3.50it/s] 37%|███▋ | 138265/371472 [10:59:02<17:47:16, 3.64it/s] 37%|███▋ | 138266/371472 [10:59:02<17:33:28, 3.69it/s] 37%|███▋ | 138267/371472 [10:59:03<17:02:26, 3.80it/s] 37%|███▋ | 138268/371472 [10:59:03<18:26:03, 3.51it/s] 37%|███▋ | 138269/371472 [10:59:03<18:58:30, 3.41it/s] 37%|███▋ | 138270/371472 [10:59:04<18:58:36, 3.41it/s] 37%|███▋ | 138271/371472 [10:59:04<18:55:29, 3.42it/s] 37%|███▋ | 138272/371472 [10:59:04<18:44:54, 3.46it/s] 37%|███▋ | 138273/371472 [10:59:04<18:13:42, 3.55it/s] 37%|███▋ | 138274/371472 [10:59:05<18:41:17, 3.47it/s] 37%|███▋ | 138275/371472 [10:59:05<17:46:35, 3.64it/s] 37%|███▋ | 138276/371472 [10:59:05<19:23:27, 3.34it/s] 37%|███▋ | 138277/371472 [10:59:06<18:44:15, 3.46it/s] 37%|███▋ | 138278/371472 [10:59:06<19:46:41, 3.28it/s] 37%|███▋ | 138279/371472 [10:59:06<19:38:52, 3.30it/s] 37%|███▋ | 138280/371472 [10:59:07<18:55:42, 3.42it/s] {'loss': 3.1584, 'learning_rate': 6.652804412937146e-07, 'epoch': 5.96} + 37%|███▋ | 138280/371472 [10:59:07<18:55:42, 3.42it/s] 37%|███▋ | 138281/371472 [10:59:07<18:13:44, 3.55it/s] 37%|███▋ | 138282/371472 [10:59:07<17:40:59, 3.66it/s] 37%|███▋ | 138283/371472 [10:59:07<17:26:42, 3.71it/s] 37%|███▋ | 138284/371472 [10:59:08<17:42:36, 3.66it/s] 37%|███▋ | 138285/371472 [10:59:08<17:52:44, 3.62it/s] 37%|███▋ | 138286/371472 [10:59:08<18:08:21, 3.57it/s] 37%|███▋ | 138287/371472 [10:59:09<19:00:33, 3.41it/s] 37%|███▋ | 138288/371472 [10:59:09<18:33:59, 3.49it/s] 37%|███▋ | 138289/371472 [10:59:09<17:59:26, 3.60it/s] 37%|███▋ | 138290/371472 [10:59:09<19:16:45, 3.36it/s] 37%|███▋ | 138291/371472 [10:59:10<18:15:02, 3.55it/s] 37%|███▋ | 138292/371472 [10:59:10<17:28:39, 3.71it/s] 37%|███▋ | 138293/371472 [10:59:10<17:22:05, 3.73it/s] 37%|███▋ | 138294/371472 [10:59:10<17:20:00, 3.74it/s] 37%|███▋ | 138295/371472 [10:59:11<17:56:41, 3.61it/s] 37%|███▋ | 138296/371472 [10:59:11<17:08:58, 3.78it/s] 37%|███▋ | 138297/371472 [10:59:11<17:02:09, 3.80it/s] 37%|███▋ | 138298/371472 [10:59:11<17:30:23, 3.70it/s] 37%|███▋ | 138299/371472 [10:59:12<16:55:20, 3.83it/s] 37%|███▋ | 138300/371472 [10:59:12<17:21:01, 3.73it/s] {'loss': 3.1979, 'learning_rate': 6.652319593182357e-07, 'epoch': 5.96} + 37%|███▋ | 138300/371472 [10:59:12<17:21:01, 3.73it/s] 37%|███▋ | 138301/371472 [10:59:12<18:18:03, 3.54it/s] 37%|███▋ | 138302/371472 [10:59:13<18:03:53, 3.59it/s] 37%|███▋ | 138303/371472 [10:59:13<19:19:13, 3.35it/s] 37%|███▋ | 138304/371472 [10:59:13<18:12:54, 3.56it/s] 37%|███▋ | 138305/371472 [10:59:13<17:38:09, 3.67it/s] 37%|███▋ | 138306/371472 [10:59:14<18:13:59, 3.55it/s] 37%|███▋ | 138307/371472 [10:59:14<17:19:55, 3.74it/s] 37%|███▋ | 138308/371472 [10:59:14<17:07:28, 3.78it/s] 37%|███▋ | 138309/371472 [10:59:15<19:17:39, 3.36it/s] 37%|███▋ | 138310/371472 [10:59:15<18:33:11, 3.49it/s] 37%|███▋ | 138311/371472 [10:59:15<18:50:22, 3.44it/s] 37%|███▋ | 138312/371472 [10:59:15<17:58:08, 3.60it/s] 37%|███▋ | 138313/371472 [10:59:16<17:26:52, 3.71it/s] 37%|███▋ | 138314/371472 [10:59:16<17:51:02, 3.63it/s] 37%|███▋ | 138315/371472 [10:59:16<17:34:45, 3.68it/s] 37%|███▋ | 138316/371472 [10:59:16<17:14:00, 3.76it/s] 37%|███▋ | 138317/371472 [10:59:17<17:50:58, 3.63it/s] 37%|███▋ | 138318/371472 [10:59:17<17:56:43, 3.61it/s] 37%|███▋ | 138319/371472 [10:59:17<17:37:51, 3.67it/s] 37%|███▋ | 138320/371472 [10:59:18<17:36:52, 3.68it/s] {'loss': 3.1629, 'learning_rate': 6.651834773427567e-07, 'epoch': 5.96} + 37%|███▋ | 138320/371472 [10:59:18<17:36:52, 3.68it/s] 37%|███▋ | 138321/371472 [10:59:18<17:10:38, 3.77it/s] 37%|███▋ | 138322/371472 [10:59:18<16:52:27, 3.84it/s] 37%|███▋ | 138323/371472 [10:59:18<16:58:06, 3.82it/s] 37%|███▋ | 138324/371472 [10:59:19<18:06:32, 3.58it/s] 37%|███▋ | 138325/371472 [10:59:19<17:56:27, 3.61it/s] 37%|███▋ | 138326/371472 [10:59:19<17:33:26, 3.69it/s] 37%|███▋ | 138327/371472 [10:59:19<18:08:59, 3.57it/s] 37%|███▋ | 138328/371472 [10:59:20<17:34:07, 3.69it/s] 37%|███▋ | 138329/371472 [10:59:20<18:02:22, 3.59it/s] 37%|███▋ | 138330/371472 [10:59:20<17:29:33, 3.70it/s] 37%|███▋ | 138331/371472 [10:59:21<18:30:34, 3.50it/s] 37%|███▋ | 138332/371472 [10:59:21<17:56:34, 3.61it/s] 37%|███▋ | 138333/371472 [10:59:21<17:43:52, 3.65it/s] 37%|███▋ | 138334/371472 [10:59:21<17:55:42, 3.61it/s] 37%|███▋ | 138335/371472 [10:59:22<17:54:55, 3.61it/s] 37%|███▋ | 138336/371472 [10:59:22<17:51:13, 3.63it/s] 37%|███▋ | 138337/371472 [10:59:22<18:01:42, 3.59it/s] 37%|███▋ | 138338/371472 [10:59:23<18:02:52, 3.59it/s] 37%|███▋ | 138339/371472 [10:59:23<17:29:56, 3.70it/s] 37%|███▋ | 138340/371472 [10:59:23<17:29:33, 3.70it/s] {'loss': 3.0764, 'learning_rate': 6.651349953672778e-07, 'epoch': 5.96} + 37%|███▋ | 138340/371472 [10:59:23<17:29:33, 3.70it/s] 37%|███▋ | 138341/371472 [10:59:23<17:40:30, 3.66it/s] 37%|███▋ | 138342/371472 [10:59:24<17:29:19, 3.70it/s] 37%|███▋ | 138343/371472 [10:59:24<17:48:02, 3.64it/s] 37%|███▋ | 138344/371472 [10:59:24<19:12:05, 3.37it/s] 37%|███▋ | 138345/371472 [10:59:24<18:42:49, 3.46it/s] 37%|███▋ | 138346/371472 [10:59:25<18:14:45, 3.55it/s] 37%|███▋ | 138347/371472 [10:59:25<18:11:16, 3.56it/s] 37%|███▋ | 138348/371472 [10:59:25<17:25:47, 3.72it/s] 37%|███▋ | 138349/371472 [10:59:26<17:31:24, 3.70it/s] 37%|███▋ | 138350/371472 [10:59:26<17:58:17, 3.60it/s] 37%|███▋ | 138351/371472 [10:59:26<18:03:51, 3.58it/s] 37%|███▋ | 138352/371472 [10:59:26<18:13:52, 3.55it/s] 37%|███▋ | 138353/371472 [10:59:27<17:50:09, 3.63it/s] 37%|███▋ | 138354/371472 [10:59:27<19:49:55, 3.27it/s] 37%|███▋ | 138355/371472 [10:59:27<18:52:38, 3.43it/s] 37%|███▋ | 138356/371472 [10:59:28<18:46:06, 3.45it/s] 37%|███▋ | 138357/371472 [10:59:28<19:02:10, 3.40it/s] 37%|███▋ | 138358/371472 [10:59:28<19:02:23, 3.40it/s] 37%|███▋ | 138359/371472 [10:59:28<18:52:38, 3.43it/s] 37%|███▋ | 138360/371472 [10:59:29<18:54:08, 3.43it/s] {'loss': 3.0662, 'learning_rate': 6.65086513391799e-07, 'epoch': 5.96} + 37%|███▋ | 138360/371472 [10:59:29<18:54:08, 3.43it/s] 37%|███▋ | 138361/371472 [10:59:29<18:37:44, 3.48it/s] 37%|███▋ | 138362/371472 [10:59:29<17:54:30, 3.62it/s] 37%|███▋ | 138363/371472 [10:59:30<18:37:04, 3.48it/s] 37%|███▋ | 138364/371472 [10:59:30<18:45:56, 3.45it/s] 37%|███▋ | 138365/371472 [10:59:30<18:27:56, 3.51it/s] 37%|███▋ | 138366/371472 [10:59:30<18:15:19, 3.55it/s] 37%|███▋ | 138367/371472 [10:59:31<19:41:09, 3.29it/s] 37%|███▋ | 138368/371472 [10:59:31<18:42:18, 3.46it/s] 37%|███▋ | 138369/371472 [10:59:31<18:43:06, 3.46it/s] 37%|███▋ | 138370/371472 [10:59:32<18:45:18, 3.45it/s] 37%|███▋ | 138371/371472 [10:59:32<17:46:13, 3.64it/s] 37%|███▋ | 138372/371472 [10:59:32<17:12:45, 3.76it/s] 37%|███▋ | 138373/371472 [10:59:32<17:00:29, 3.81it/s] 37%|███▋ | 138374/371472 [10:59:33<16:54:27, 3.83it/s] 37%|███▋ | 138375/371472 [10:59:33<17:31:46, 3.69it/s] 37%|███▋ | 138376/371472 [10:59:33<17:41:22, 3.66it/s] 37%|███▋ | 138377/371472 [10:59:33<17:45:40, 3.65it/s] 37%|███▋ | 138378/371472 [10:59:34<17:37:40, 3.67it/s] 37%|███▋ | 138379/371472 [10:59:34<18:12:13, 3.56it/s] 37%|███▋ | 138380/371472 [10:59:34<17:52:34, 3.62it/s] {'loss': 3.3012, 'learning_rate': 6.650380314163201e-07, 'epoch': 5.96} + 37%|███▋ | 138380/371472 [10:59:34<17:52:34, 3.62it/s] 37%|███▋ | 138381/371472 [10:59:35<17:50:24, 3.63it/s] 37%|███▋ | 138382/371472 [10:59:35<18:29:33, 3.50it/s] 37%|███▋ | 138383/371472 [10:59:35<19:46:41, 3.27it/s] 37%|███▋ | 138384/371472 [10:59:36<21:15:25, 3.05it/s] 37%|███▋ | 138385/371472 [10:59:36<19:51:10, 3.26it/s] 37%|███▋ | 138386/371472 [10:59:36<19:18:45, 3.35it/s] 37%|███▋ | 138387/371472 [10:59:36<18:15:14, 3.55it/s] 37%|███▋ | 138388/371472 [10:59:37<17:46:23, 3.64it/s] 37%|███▋ | 138389/371472 [10:59:37<17:54:46, 3.61it/s] 37%|███▋ | 138390/371472 [10:59:37<17:25:14, 3.72it/s] 37%|███▋ | 138391/371472 [10:59:38<18:20:06, 3.53it/s] 37%|███▋ | 138392/371472 [10:59:38<17:46:22, 3.64it/s] 37%|███▋ | 138393/371472 [10:59:38<17:12:44, 3.76it/s] 37%|███▋ | 138394/371472 [10:59:38<17:39:10, 3.67it/s] 37%|███▋ | 138395/371472 [10:59:39<18:12:16, 3.56it/s] 37%|███▋ | 138396/371472 [10:59:39<19:09:19, 3.38it/s] 37%|███▋ | 138397/371472 [10:59:39<18:16:52, 3.54it/s] 37%|███▋ | 138398/371472 [10:59:40<18:39:46, 3.47it/s] 37%|███▋ | 138399/371472 [10:59:40<18:10:02, 3.56it/s] 37%|███▋ | 138400/371472 [10:59:40<17:24:50, 3.72it/s] {'loss': 3.1651, 'learning_rate': 6.649895494408411e-07, 'epoch': 5.96} + 37%|███▋ | 138400/371472 [10:59:40<17:24:50, 3.72it/s] 37%|███▋ | 138401/371472 [10:59:40<17:22:03, 3.73it/s] 37%|███▋ | 138402/371472 [10:59:41<18:09:07, 3.57it/s] 37%|███▋ | 138403/371472 [10:59:41<17:20:13, 3.73it/s] 37%|██��▋ | 138404/371472 [10:59:41<17:20:54, 3.73it/s] 37%|███▋ | 138405/371472 [10:59:41<17:18:44, 3.74it/s] 37%|███▋ | 138406/371472 [10:59:42<17:01:40, 3.80it/s] 37%|███▋ | 138407/371472 [10:59:42<17:12:30, 3.76it/s] 37%|███▋ | 138408/371472 [10:59:42<17:00:59, 3.80it/s] 37%|███▋ | 138409/371472 [10:59:42<16:44:33, 3.87it/s] 37%|███▋ | 138410/371472 [10:59:43<18:17:13, 3.54it/s] 37%|███▋ | 138411/371472 [10:59:43<18:39:31, 3.47it/s] 37%|███▋ | 138412/371472 [10:59:43<18:13:27, 3.55it/s] 37%|███▋ | 138413/371472 [10:59:44<17:47:10, 3.64it/s] 37%|███▋ | 138414/371472 [10:59:44<17:10:08, 3.77it/s] 37%|███▋ | 138415/371472 [10:59:44<17:01:58, 3.80it/s] 37%|███▋ | 138416/371472 [10:59:44<17:18:08, 3.74it/s] 37%|███▋ | 138417/371472 [10:59:45<17:14:42, 3.75it/s] 37%|███▋ | 138418/371472 [10:59:45<16:40:05, 3.88it/s] 37%|███▋ | 138419/371472 [10:59:45<17:01:15, 3.80it/s] 37%|███▋ | 138420/371472 [10:59:45<16:55:27, 3.83it/s] {'loss': 3.2542, 'learning_rate': 6.649410674653623e-07, 'epoch': 5.96} + 37%|███▋ | 138420/371472 [10:59:45<16:55:27, 3.83it/s] 37%|███▋ | 138421/371472 [10:59:46<17:23:20, 3.72it/s] 37%|███▋ | 138422/371472 [10:59:46<16:52:19, 3.84it/s] 37%|███▋ | 138423/371472 [10:59:46<17:56:10, 3.61it/s] 37%|███▋ | 138424/371472 [10:59:46<17:27:37, 3.71it/s] 37%|███▋ | 138425/371472 [10:59:47<17:20:12, 3.73it/s] 37%|███▋ | 138426/371472 [10:59:47<17:22:12, 3.73it/s] 37%|███▋ | 138427/371472 [10:59:47<17:09:35, 3.77it/s] 37%|███▋ | 138428/371472 [10:59:48<19:41:26, 3.29it/s] 37%|███▋ | 138429/371472 [10:59:48<21:10:59, 3.06it/s] 37%|███▋ | 138430/371472 [10:59:48<20:39:56, 3.13it/s] 37%|███▋ | 138431/371472 [10:59:49<19:52:42, 3.26it/s] 37%|███▋ | 138432/371472 [10:59:49<20:15:00, 3.20it/s] 37%|███▋ | 138433/371472 [10:59:49<19:35:48, 3.30it/s] 37%|███▋ | 138434/371472 [10:59:49<18:47:55, 3.44it/s] 37%|███▋ | 138435/371472 [10:59:50<18:18:47, 3.53it/s] 37%|███▋ | 138436/371472 [10:59:50<18:42:12, 3.46it/s] 37%|███▋ | 138437/371472 [10:59:50<18:16:35, 3.54it/s] 37%|███▋ | 138438/371472 [10:59:51<17:28:42, 3.70it/s] 37%|███▋ | 138439/371472 [10:59:51<17:40:00, 3.66it/s] 37%|███▋ | 138440/371472 [10:59:51<17:19:34, 3.74it/s] {'loss': 2.9693, 'learning_rate': 6.648925854898834e-07, 'epoch': 5.96} + 37%|███▋ | 138440/371472 [10:59:51<17:19:34, 3.74it/s] 37%|███▋ | 138441/371472 [10:59:51<16:44:23, 3.87it/s] 37%|███▋ | 138442/371472 [10:59:52<16:42:26, 3.87it/s] 37%|███▋ | 138443/371472 [10:59:52<18:38:38, 3.47it/s] 37%|███▋ | 138444/371472 [10:59:52<18:05:24, 3.58it/s] 37%|███▋ | 138445/371472 [10:59:52<17:48:44, 3.63it/s] 37%|███▋ | 138446/371472 [10:59:53<17:22:37, 3.72it/s] 37%|███▋ | 138447/371472 [10:59:53<18:28:58, 3.50it/s] 37%|███▋ | 138448/371472 [10:59:53<17:45:38, 3.64it/s] 37%|███▋ | 138449/371472 [10:59:54<17:34:17, 3.68it/s] 37%|███▋ | 138450/371472 [10:59:54<17:22:09, 3.73it/s] 37%|███▋ | 138451/371472 [10:59:54<17:23:02, 3.72it/s] 37%|███▋ | 138452/371472 [10:59:54<18:09:44, 3.56it/s] 37%|███▋ | 138453/371472 [10:59:55<19:32:53, 3.31it/s] 37%|███▋ | 138454/371472 [10:59:55<18:09:51, 3.56it/s] 37%|███▋ | 138455/371472 [10:59:55<18:10:45, 3.56it/s] 37%|███▋ | 138456/371472 [10:59:56<17:49:13, 3.63it/s] 37%|███▋ | 138457/371472 [10:59:56<18:52:09, 3.43it/s] 37%|███▋ | 138458/371472 [10:59:56<19:42:13, 3.28it/s] 37%|███▋ | 138459/371472 [10:59:56<19:01:33, 3.40it/s] 37%|███▋ | 138460/371472 [10:59:57<18:10:39, 3.56it/s] {'loss': 3.0111, 'learning_rate': 6.648441035144045e-07, 'epoch': 5.96} + 37%|███▋ | 138460/371472 [10:59:57<18:10:39, 3.56it/s] 37%|███▋ | 138461/371472 [10:59:57<17:21:48, 3.73it/s] 37%|███▋ | 138462/371472 [10:59:57<18:34:09, 3.49it/s] 37%|███▋ | 138463/371472 [10:59:57<17:25:34, 3.71it/s] 37%|███▋ | 138464/371472 [10:59:58<17:21:52, 3.73it/s] 37%|███▋ | 138465/371472 [10:59:58<17:31:08, 3.69it/s] 37%|███▋ | 138466/371472 [10:59:58<18:32:44, 3.49it/s] 37%|███▋ | 138467/371472 [10:59:59<18:26:18, 3.51it/s] 37%|███▋ | 138468/371472 [10:59:59<18:39:28, 3.47it/s] 37%|███▋ | 138469/371472 [10:59:59<18:19:41, 3.53it/s] 37%|███▋ | 138470/371472 [10:59:59<18:12:55, 3.55it/s] 37%|███▋ | 138471/371472 [11:00:00<18:43:25, 3.46it/s] 37%|███▋ | 138472/371472 [11:00:00<18:26:42, 3.51it/s] 37%|███▋ | 138473/371472 [11:00:00<17:37:10, 3.67it/s] 37%|███▋ | 138474/371472 [11:00:01<17:54:57, 3.61it/s] 37%|███▋ | 138475/371472 [11:00:01<18:28:53, 3.50it/s] 37%|███▋ | 138476/371472 [11:00:01<18:15:02, 3.55it/s] 37%|███▋ | 138477/371472 [11:00:01<17:47:10, 3.64it/s] 37%|███▋ | 138478/371472 [11:00:02<17:38:06, 3.67it/s] 37%|███▋ | 138479/371472 [11:00:02<17:11:27, 3.76it/s] 37%|███▋ | 138480/371472 [11:00:02<16:29:21, 3.92it/s] {'loss': 3.1406, 'learning_rate': 6.647956215389255e-07, 'epoch': 5.96} + 37%|███▋ | 138480/371472 [11:00:02<16:29:21, 3.92it/s] 37%|███▋ | 138481/371472 [11:00:02<17:08:55, 3.77it/s] 37%|███▋ | 138482/371472 [11:00:03<17:06:13, 3.78it/s] 37%|███▋ | 138483/371472 [11:00:03<17:20:54, 3.73it/s] 37%|███▋ | 138484/371472 [11:00:03<17:48:36, 3.63it/s] 37%|███▋ | 138485/371472 [11:00:04<18:14:50, 3.55it/s] 37%|███▋ | 138486/371472 [11:00:04<17:42:29, 3.65it/s] 37%|███▋ | 138487/371472 [11:00:04<16:57:37, 3.82it/s] 37%|███▋ | 138488/371472 [11:00:04<18:31:43, 3.49it/s] 37%|███▋ | 138489/371472 [11:00:05<19:44:43, 3.28it/s] 37%|███▋ | 138490/371472 [11:00:05<18:33:48, 3.49it/s] 37%|███▋ | 138491/371472 [11:00:05<18:56:44, 3.42it/s] 37%|███▋ | 138492/371472 [11:00:06<18:27:44, 3.51it/s] 37%|███▋ | 138493/371472 [11:00:06<18:22:35, 3.52it/s] 37%|███▋ | 138494/371472 [11:00:06<20:40:19, 3.13it/s] 37%|███▋ | 138495/371472 [11:00:07<19:31:29, 3.31it/s] 37%|███▋ | 138496/371472 [11:00:07<19:01:15, 3.40it/s] 37%|███▋ | 138497/371472 [11:00:07<18:58:19, 3.41it/s] 37%|███▋ | 138498/371472 [11:00:07<20:13:16, 3.20it/s] 37%|███▋ | 138499/371472 [11:00:08<18:32:56, 3.49it/s] 37%|███▋ | 138500/371472 [11:00:08<17:39:23, 3.67it/s] {'loss': 3.0596, 'learning_rate': 6.647471395634467e-07, 'epoch': 5.97} + 37%|███▋ | 138500/371472 [11:00:08<17:39:23, 3.67it/s] 37%|███▋ | 138501/371472 [11:00:08<18:41:22, 3.46it/s] 37%|███▋ | 138502/371472 [11:00:09<17:50:45, 3.63it/s] 37%|███▋ | 138503/371472 [11:00:09<19:01:02, 3.40it/s] 37%|███▋ | 138504/371472 [11:00:09<18:18:33, 3.53it/s] 37%|███▋ | 138505/371472 [11:00:09<18:38:15, 3.47it/s] 37%|███▋ | 138506/371472 [11:00:10<18:36:09, 3.48it/s] 37%|███▋ | 138507/371472 [11:00:10<20:15:26, 3.19it/s] 37%|███▋ | 138508/371472 [11:00:10<19:13:09, 3.37it/s] 37%|███▋ | 138509/371472 [11:00:11<18:28:30, 3.50it/s] 37%|███▋ | 138510/371472 [11:00:11<18:24:38, 3.51it/s] 37%|███▋ | 138511/371472 [11:00:11<19:21:15, 3.34it/s] 37%|███▋ | 138512/371472 [11:00:12<19:45:27, 3.28it/s] 37%|███▋ | 138513/371472 [11:00:12<19:29:12, 3.32it/s] 37%|███▋ | 138514/371472 [11:00:12<18:52:56, 3.43it/s] 37%|███▋ | 138515/371472 [11:00:12<19:19:05, 3.35it/s] 37%|███▋ | 138516/371472 [11:00:13<19:01:27, 3.40it/s] 37%|███▋ | 138517/371472 [11:00:13<18:12:25, 3.55it/s] 37%|███▋ | 138518/371472 [11:00:13<18:04:24, 3.58it/s] 37%|███▋ | 138519/371472 [11:00:13<17:34:51, 3.68it/s] 37%|███▋ | 138520/371472 [11:00:14<17:32:40, 3.69it/s] {'loss': 3.0972, 'learning_rate': 6.646986575879679e-07, 'epoch': 5.97} + 37%|███▋ | 138520/371472 [11:00:14<17:32:40, 3.69it/s] 37%|███▋ | 138521/371472 [11:00:14<17:03:57, 3.79it/s] 37%|███▋ | 138522/371472 [11:00:14<17:32:14, 3.69it/s] 37%|███▋ | 138523/371472 [11:00:15<17:30:19, 3.70it/s] 37%|███▋ | 138524/371472 [11:00:15<17:30:22, 3.70it/s] 37%|███▋ | 138525/371472 [11:00:15<17:28:28, 3.70it/s] 37%|███▋ | 138526/371472 [11:00:15<18:23:41, 3.52it/s] 37%|███▋ | 138527/371472 [11:00:16<18:04:26, 3.58it/s] 37%|███▋ | 138528/371472 [11:00:16<17:59:59, 3.59it/s] 37%|███▋ | 138529/371472 [11:00:16<17:52:43, 3.62it/s] 37%|███▋ | 138530/371472 [11:00:17<18:28:36, 3.50it/s] 37%|███▋ | 138531/371472 [11:00:17<18:17:26, 3.54it/s] 37%|███▋ | 138532/371472 [11:00:17<17:43:41, 3.65it/s] 37%|███▋ | 138533/371472 [11:00:17<17:18:22, 3.74it/s] 37%|███▋ | 138534/371472 [11:00:18<17:59:45, 3.60it/s] 37%|███▋ | 138535/371472 [11:00:18<17:39:56, 3.66it/s] 37%|███▋ | 138536/371472 [11:00:18<20:28:55, 3.16it/s] 37%|███▋ | 138537/371472 [11:00:19<19:53:32, 3.25it/s] 37%|███▋ | 138538/371472 [11:00:19<21:10:15, 3.06it/s] 37%|███▋ | 138539/371472 [11:00:19<20:22:35, 3.18it/s] 37%|███▋ | 138540/371472 [11:00:19<19:22:55, 3.34it/s] {'loss': 3.3605, 'learning_rate': 6.64650175612489e-07, 'epoch': 5.97} + 37%|███▋ | 138540/371472 [11:00:19<19:22:55, 3.34it/s] 37%|███▋ | 138541/371472 [11:00:20<18:37:42, 3.47it/s] 37%|███▋ | 138542/371472 [11:00:20<18:26:27, 3.51it/s] 37%|███▋ | 138543/371472 [11:00:20<18:09:35, 3.56it/s] 37%|███▋ | 138544/371472 [11:00:21<19:08:40, 3.38it/s] 37%|███▋ | 138545/371472 [11:00:21<18:21:30, 3.52it/s] 37%|███▋ | 138546/371472 [11:00:21<19:00:40, 3.40it/s] 37%|███▋ | 138547/371472 [11:00:21<19:03:22, 3.40it/s] 37%|███▋ | 138548/371472 [11:00:22<19:41:18, 3.29it/s] 37%|███▋ | 138549/371472 [11:00:22<18:15:18, 3.54it/s] 37%|███▋ | 138550/371472 [11:00:22<18:17:38, 3.54it/s] 37%|███▋ | 138551/371472 [11:00:23<19:53:49, 3.25it/s] 37%|███▋ | 138552/371472 [11:00:23<20:16:51, 3.19it/s] 37%|███▋ | 138553/371472 [11:00:23<19:07:07, 3.38it/s] 37%|███▋ | 138554/371472 [11:00:24<18:42:58, 3.46it/s] 37%|███▋ | 138555/371472 [11:00:24<18:11:05, 3.56it/s] 37%|███▋ | 138556/371472 [11:00:24<17:57:37, 3.60it/s] 37%|███▋ | 138557/371472 [11:00:24<19:20:57, 3.34it/s] 37%|███▋ | 138558/371472 [11:00:25<18:05:25, 3.58it/s] 37%|███▋ | 138559/371472 [11:00:25<18:10:56, 3.56it/s] 37%|███▋ | 138560/371472 [11:00:25<17:45:59, 3.64it/s] {'loss': 3.0885, 'learning_rate': 6.6460169363701e-07, 'epoch': 5.97} + 37%|███▋ | 138560/371472 [11:00:25<17:45:59, 3.64it/s] 37%|███▋ | 138561/371472 [11:00:26<20:32:14, 3.15it/s] 37%|███▋ | 138562/371472 [11:00:26<20:34:21, 3.14it/s] 37%|███▋ | 138563/371472 [11:00:26<21:02:57, 3.07it/s] 37%|███▋ | 138564/371472 [11:00:27<19:54:23, 3.25it/s] 37%|███▋ | 138565/371472 [11:00:27<21:27:58, 3.01it/s] 37%|███▋ | 138566/371472 [11:00:27<20:43:15, 3.12it/s] 37%|███▋ | 138567/371472 [11:00:27<19:15:46, 3.36it/s] 37%|███▋ | 138568/371472 [11:00:28<19:09:23, 3.38it/s] 37%|███▋ | 138569/371472 [11:00:28<18:44:44, 3.45it/s] 37%|███▋ | 138570/371472 [11:00:28<18:21:07, 3.53it/s] 37%|███▋ | 138571/371472 [11:00:29<18:32:22, 3.49it/s] 37%|███▋ | 138572/371472 [11:00:29<17:35:56, 3.68it/s] 37%|███▋ | 138573/371472 [11:00:29<17:12:34, 3.76it/s] 37%|███▋ | 138574/371472 [11:00:29<19:01:15, 3.40it/s] 37%|███▋ | 138575/371472 [11:00:30<18:11:38, 3.56it/s] 37%|███▋ | 138576/371472 [11:00:30<18:18:21, 3.53it/s] 37%|███▋ | 138577/371472 [11:00:30<17:38:16, 3.67it/s] 37%|███▋ | 138578/371472 [11:00:31<17:33:57, 3.68it/s] 37%|███▋ | 138579/371472 [11:00:31<17:37:55, 3.67it/s] 37%|███▋ | 138580/371472 [11:00:31<17:18:06, 3.74it/s] {'loss': 3.2058, 'learning_rate': 6.645532116615311e-07, 'epoch': 5.97} + 37%|███▋ | 138580/371472 [11:00:31<17:18:06, 3.74it/s] 37%|███▋ | 138581/371472 [11:00:31<16:49:14, 3.85it/s] 37%|███▋ | 138582/371472 [11:00:32<18:01:53, 3.59it/s] 37%|███▋ | 138583/371472 [11:00:32<20:10:39, 3.21it/s] 37%|███▋ | 138584/371472 [11:00:32<18:48:28, 3.44it/s] 37%|███▋ | 138585/371472 [11:00:33<18:51:20, 3.43it/s] 37%|███▋ | 138586/371472 [11:00:33<18:44:16, 3.45it/s] 37%|███▋ | 138587/371472 [11:00:33<18:12:43, 3.55it/s] 37%|███▋ | 138588/371472 [11:00:33<17:26:06, 3.71it/s] 37%|███▋ | 138589/371472 [11:00:34<17:05:39, 3.78it/s] 37%|███▋ | 138590/371472 [11:00:34<16:54:01, 3.83it/s] 37%|███▋ | 138591/371472 [11:00:34<16:37:52, 3.89it/s] 37%|███▋ | 138592/371472 [11:00:34<16:27:15, 3.93it/s] 37%|███▋ | 138593/371472 [11:00:35<18:13:19, 3.55it/s] 37%|███▋ | 138594/371472 [11:00:35<19:04:42, 3.39it/s] 37%|███▋ | 138595/371472 [11:00:35<18:20:52, 3.53it/s] 37%|███▋ | 138596/371472 [11:00:35<17:32:10, 3.69it/s] 37%|██���▋ | 138597/371472 [11:00:36<18:49:29, 3.44it/s] 37%|███▋ | 138598/371472 [11:00:36<18:14:06, 3.55it/s] 37%|███▋ | 138599/371472 [11:00:36<17:35:36, 3.68it/s] 37%|███▋ | 138600/371472 [11:00:37<16:44:12, 3.86it/s] {'loss': 3.2432, 'learning_rate': 6.645047296860523e-07, 'epoch': 5.97} + 37%|███▋ | 138600/371472 [11:00:37<16:44:12, 3.86it/s] 37%|███▋ | 138601/371472 [11:00:37<16:47:57, 3.85it/s] 37%|███▋ | 138602/371472 [11:00:37<17:21:23, 3.73it/s] 37%|███▋ | 138603/371472 [11:00:37<18:35:29, 3.48it/s] 37%|███▋ | 138604/371472 [11:00:38<20:35:17, 3.14it/s] 37%|███▋ | 138605/371472 [11:00:38<19:08:56, 3.38it/s] 37%|███▋ | 138606/371472 [11:00:38<20:47:25, 3.11it/s] 37%|███▋ | 138607/371472 [11:00:39<20:58:54, 3.08it/s] 37%|███▋ | 138608/371472 [11:00:39<20:53:26, 3.10it/s] 37%|███▋ | 138609/371472 [11:00:39<20:33:58, 3.15it/s] 37%|███▋ | 138610/371472 [11:00:40<20:11:36, 3.20it/s] 37%|███▋ | 138611/371472 [11:00:40<18:48:41, 3.44it/s] 37%|███▋ | 138612/371472 [11:00:40<19:14:24, 3.36it/s] 37%|███▋ | 138613/371472 [11:00:41<18:29:25, 3.50it/s] 37%|███▋ | 138614/371472 [11:00:41<18:35:45, 3.48it/s] 37%|███▋ | 138615/371472 [11:00:41<18:30:28, 3.49it/s] 37%|███▋ | 138616/371472 [11:00:41<18:09:57, 3.56it/s] 37%|███▋ | 138617/371472 [11:00:42<18:52:39, 3.43it/s] 37%|███▋ | 138618/371472 [11:00:42<18:34:17, 3.48it/s] 37%|███▋ | 138619/371472 [11:00:42<17:48:47, 3.63it/s] 37%|███▋ | 138620/371472 [11:00:42<17:30:07, 3.70it/s] {'loss': 3.0379, 'learning_rate': 6.644562477105732e-07, 'epoch': 5.97} + 37%|███▋ | 138620/371472 [11:00:42<17:30:07, 3.70it/s] 37%|███▋ | 138621/371472 [11:00:43<18:32:36, 3.49it/s] 37%|███▋ | 138622/371472 [11:00:43<18:20:53, 3.53it/s] 37%|███▋ | 138623/371472 [11:00:43<18:10:36, 3.56it/s] 37%|███▋ | 138624/371472 [11:00:44<17:50:38, 3.62it/s] 37%|███▋ | 138625/371472 [11:00:44<17:29:30, 3.70it/s] 37%|███▋ | 138626/371472 [11:00:44<16:54:45, 3.82it/s] 37%|███▋ | 138627/371472 [11:00:44<16:33:43, 3.91it/s] 37%|███▋ | 138628/371472 [11:00:45<16:09:20, 4.00it/s] 37%|███▋ | 138629/371472 [11:00:45<16:19:31, 3.96it/s] 37%|███▋ | 138630/371472 [11:00:45<16:06:26, 4.02it/s] 37%|███▋ | 138631/371472 [11:00:45<16:50:07, 3.84it/s] 37%|███▋ | 138632/371472 [11:00:46<16:38:19, 3.89it/s] 37%|███▋ | 138633/371472 [11:00:46<17:19:26, 3.73it/s] 37%|███▋ | 138634/371472 [11:00:46<17:25:26, 3.71it/s] 37%|███▋ | 138635/371472 [11:00:46<17:04:12, 3.79it/s] 37%|███▋ | 138636/371472 [11:00:47<17:11:52, 3.76it/s] 37%|███▋ | 138637/371472 [11:00:47<17:22:18, 3.72it/s] 37%|███▋ | 138638/371472 [11:00:47<17:25:35, 3.71it/s] 37%|███▋ | 138639/371472 [11:00:47<16:35:36, 3.90it/s] 37%|███▋ | 138640/371472 [11:00:48<17:06:07, 3.78it/s] {'loss': 3.1696, 'learning_rate': 6.644077657350944e-07, 'epoch': 5.97} + 37%|███▋ | 138640/371472 [11:00:48<17:06:07, 3.78it/s] 37%|███▋ | 138641/371472 [11:00:48<17:22:13, 3.72it/s] 37%|███▋ | 138642/371472 [11:00:48<17:34:53, 3.68it/s] 37%|███▋ | 138643/371472 [11:00:49<17:21:43, 3.73it/s] 37%|███▋ | 138644/371472 [11:00:49<17:15:52, 3.75it/s] 37%|███▋ | 138645/371472 [11:00:49<17:11:50, 3.76it/s] 37%|███▋ | 138646/371472 [11:00:49<18:59:28, 3.41it/s] 37%|███▋ | 138647/371472 [11:00:50<18:25:12, 3.51it/s] 37%|███▋ | 138648/371472 [11:00:50<18:04:52, 3.58it/s] 37%|███▋ | 138649/371472 [11:00:50<18:01:20, 3.59it/s] 37%|███▋ | 138650/371472 [11:00:51<17:32:03, 3.69it/s] 37%|███▋ | 138651/371472 [11:00:51<17:34:12, 3.68it/s] 37%|███▋ | 138652/371472 [11:00:51<17:08:11, 3.77it/s] 37%|███▋ | 138653/371472 [11:00:51<16:30:26, 3.92it/s] 37%|███▋ | 138654/371472 [11:00:52<16:38:32, 3.89it/s] 37%|███▋ | 138655/371472 [11:00:52<16:25:45, 3.94it/s] 37%|███▋ | 138656/371472 [11:00:52<16:25:55, 3.94it/s] 37%|███▋ | 138657/371472 [11:00:52<16:26:32, 3.93it/s] 37%|███▋ | 138658/371472 [11:00:53<16:37:01, 3.89it/s] 37%|███▋ | 138659/371472 [11:00:53<16:59:32, 3.81it/s] 37%|███▋ | 138660/371472 [11:00:53<18:05:22, 3.57it/s] {'loss': 3.1055, 'learning_rate': 6.643592837596156e-07, 'epoch': 5.97} + 37%|███▋ | 138660/371472 [11:00:53<18:05:22, 3.57it/s] 37%|███▋ | 138661/371472 [11:00:53<17:50:16, 3.63it/s] 37%|███▋ | 138662/371472 [11:00:54<17:22:24, 3.72it/s] 37%|███▋ | 138663/371472 [11:00:54<17:44:33, 3.64it/s] 37%|███▋ | 138664/371472 [11:00:54<17:18:41, 3.74it/s] 37%|███▋ | 138665/371472 [11:00:55<17:35:19, 3.68it/s] 37%|███▋ | 138666/371472 [11:00:55<17:46:19, 3.64it/s] 37%|███▋ | 138667/371472 [11:00:55<16:57:55, 3.81it/s] 37%|███▋ | 138668/371472 [11:00:55<17:53:39, 3.61it/s] 37%|███▋ | 138669/371472 [11:00:56<18:49:11, 3.44it/s] 37%|███▋ | 138670/371472 [11:00:56<18:33:56, 3.48it/s] 37%|███▋ | 138671/371472 [11:00:56<17:58:16, 3.60it/s] 37%|███▋ | 138672/371472 [11:00:57<19:03:25, 3.39it/s] 37%|███▋ | 138673/371472 [11:00:57<18:32:41, 3.49it/s] 37%|███▋ | 138674/371472 [11:00:57<17:35:50, 3.67it/s] 37%|███▋ | 138675/371472 [11:00:57<17:29:20, 3.70it/s] 37%|███▋ | 138676/371472 [11:00:58<17:02:13, 3.80it/s] 37%|███▋ | 138677/371472 [11:00:58<16:51:37, 3.84it/s] 37%|███▋ | 138678/371472 [11:00:58<16:45:36, 3.86it/s] 37%|███▋ | 138679/371472 [11:00:58<17:35:10, 3.68it/s] 37%|███▋ | 138680/371472 [11:00:59<18:15:36, 3.54it/s] {'loss': 3.1759, 'learning_rate': 6.643108017841368e-07, 'epoch': 5.97} + 37%|███▋ | 138680/371472 [11:00:59<18:15:36, 3.54it/s] 37%|███▋ | 138681/371472 [11:00:59<17:54:26, 3.61it/s] 37%|███▋ | 138682/371472 [11:00:59<17:42:46, 3.65it/s] 37%|███▋ | 138683/371472 [11:01:00<18:36:08, 3.48it/s] 37%|███▋ | 138684/371472 [11:01:00<17:54:14, 3.61it/s] 37%|███▋ | 138685/371472 [11:01:00<17:45:22, 3.64it/s] 37%|███▋ | 138686/371472 [11:01:00<17:19:38, 3.73it/s] 37%|███▋ | 138687/371472 [11:01:01<17:06:20, 3.78it/s] 37%|███▋ | 138688/371472 [11:01:01<17:11:15, 3.76it/s] 37%|███▋ | 138689/371472 [11:01:01<17:00:57, 3.80it/s] 37%|███▋ | 138690/371472 [11:01:01<18:06:59, 3.57it/s] 37%|███▋ | 138691/371472 [11:01:02<17:52:48, 3.62it/s] 37%|███▋ | 138692/371472 [11:01:02<17:08:45, 3.77it/s] 37%|███▋ | 138693/371472 [11:01:02<16:39:03, 3.88it/s] 37%|███▋ | 138694/371472 [11:01:02<17:07:59, 3.77it/s] 37%|███▋ | 138695/371472 [11:01:03<17:57:15, 3.60it/s] 37%|███▋ | 138696/371472 [11:01:03<19:05:08, 3.39it/s] 37%|███▋ | 138697/371472 [11:01:03<20:04:16, 3.22it/s] 37%|███▋ | 138698/371472 [11:01:04<20:12:19, 3.20it/s] 37%|███▋ | 138699/371472 [11:01:04<19:46:44, 3.27it/s] 37%|███▋ | 138700/371472 [11:01:04<21:48:31, 2.96it/s] {'loss': 3.0362, 'learning_rate': 6.642623198086577e-07, 'epoch': 5.97} + 37%|███▋ | 138700/371472 [11:01:04<21:48:31, 2.96it/s] 37%|███▋ | 138701/371472 [11:01:05<21:11:20, 3.05it/s] 37%|███▋ | 138702/371472 [11:01:05<19:31:53, 3.31it/s] 37%|███▋ | 138703/371472 [11:01:05<18:09:38, 3.56it/s] 37%|███▋ | 138704/371472 [11:01:05<18:03:44, 3.58it/s] 37%|███▋ | 138705/371472 [11:01:06<18:04:35, 3.58it/s] 37%|███▋ | 138706/371472 [11:01:06<18:02:06, 3.59it/s] 37%|███▋ | 138707/371472 [11:01:06<18:12:40, 3.55it/s] 37%|███▋ | 138708/371472 [11:01:07<17:17:18, 3.74it/s] 37%|███▋ | 138709/371472 [11:01:07<18:21:29, 3.52it/s] 37%|███▋ | 138710/371472 [11:01:07<19:42:28, 3.28it/s] 37%|███▋ | 138711/371472 [11:01:07<18:33:58, 3.48it/s] 37%|███▋ | 138712/371472 [11:01:08<18:25:01, 3.51it/s] 37%|███▋ | 138713/371472 [11:01:08<19:18:00, 3.35it/s] 37%|███▋ | 138714/371472 [11:01:08<19:13:02, 3.36it/s] 37%|███▋ | 138715/371472 [11:01:09<18:42:03, 3.46it/s] 37%|███▋ | 138716/371472 [11:01:09<19:04:23, 3.39it/s] 37%|███▋ | 138717/371472 [11:01:09<18:52:44, 3.42it/s] 37%|███▋ | 138718/371472 [11:01:10<18:15:53, 3.54it/s] 37%|███▋ | 138719/371472 [11:01:10<18:55:58, 3.41it/s] 37%|███▋ | 138720/371472 [11:01:10<18:55:35, 3.42it/s] {'loss': 2.9943, 'learning_rate': 6.642138378331788e-07, 'epoch': 5.97} + 37%|███▋ | 138720/371472 [11:01:10<18:55:35, 3.42it/s] 37%|███▋ | 138721/371472 [11:01:10<18:10:58, 3.56it/s] 37%|███▋ | 138722/371472 [11:01:11<17:23:26, 3.72it/s] 37%|███▋ | 138723/371472 [11:01:11<17:02:11, 3.79it/s] 37%|███▋ | 138724/371472 [11:01:11<16:44:44, 3.86it/s] 37%|███▋ | 138725/371472 [11:01:11<16:29:26, 3.92it/s] 37%|███▋ | 138726/371472 [11:01:12<16:42:56, 3.87it/s] 37%|███▋ | 138727/371472 [11:01:12<17:52:08, 3.62it/s] 37%|███▋ | 138728/371472 [11:01:12<18:19:02, 3.53it/s] 37%|███▋ | 138729/371472 [11:01:13<19:16:55, 3.35it/s] 37%|███▋ | 138730/371472 [11:01:13<18:35:43, 3.48it/s] 37%|███▋ | 138731/371472 [11:01:13<18:14:16, 3.54it/s] 37%|███▋ | 138732/371472 [11:01:13<17:58:34, 3.60it/s] 37%|███▋ | 138733/371472 [11:01:14<17:20:49, 3.73it/s] 37%|███▋ | 138734/371472 [11:01:14<17:43:47, 3.65it/s] 37%|███▋ | 138735/371472 [11:01:14<18:08:33, 3.56it/s] 37%|███▋ | 138736/371472 [11:01:14<18:03:27, 3.58it/s] 37%|███▋ | 138737/371472 [11:01:15<19:53:07, 3.25it/s] 37%|███▋ | 138738/371472 [11:01:15<19:01:43, 3.40it/s] 37%|███▋ | 138739/371472 [11:01:15<18:07:53, 3.57it/s] 37%|███▋ | 138740/371472 [11:01:16<17:45:12, 3.64it/s] {'loss': 3.248, 'learning_rate': 6.641653558577e-07, 'epoch': 5.98} + 37%|███▋ | 138740/371472 [11:01:16<17:45:12, 3.64it/s] 37%|███▋ | 138741/371472 [11:01:16<20:14:35, 3.19it/s] 37%|███▋ | 138742/371472 [11:01:16<21:23:28, 3.02it/s] 37%|███▋ | 138743/371472 [11:01:17<21:21:26, 3.03it/s] 37%|███▋ | 138744/371472 [11:01:17<20:21:58, 3.17it/s] 37%|███▋ | 138745/371472 [11:01:17<20:02:21, 3.23it/s] 37%|███▋ | 138746/371472 [11:01:18<19:37:28, 3.29it/s] 37%|███▋ | 138747/371472 [11:01:18<18:49:51, 3.43it/s] 37%|███▋ | 138748/371472 [11:01:18<18:41:21, 3.46it/s] 37%|███▋ | 138749/371472 [11:01:18<18:04:02, 3.58it/s] 37%|███▋ | 138750/371472 [11:01:19<18:07:31, 3.57it/s] 37%|███▋ | 138751/371472 [11:01:19<17:17:31, 3.74it/s] 37%|███▋ | 138752/371472 [11:01:19<17:14:56, 3.75it/s] 37%|███▋ | 138753/371472 [11:01:20<18:20:37, 3.52it/s] 37%|███▋ | 138754/371472 [11:01:20<18:11:34, 3.55it/s] 37%|███▋ | 138755/371472 [11:01:20<18:04:30, 3.58it/s] 37%|███▋ | 138756/371472 [11:01:20<18:12:53, 3.55it/s] 37%|███▋ | 138757/371472 [11:01:21<17:54:42, 3.61it/s] 37%|███▋ | 138758/371472 [11:01:21<18:29:49, 3.49it/s] 37%|███▋ | 138759/371472 [11:01:21<19:45:00, 3.27it/s] 37%|███▋ | 138760/371472 [11:01:22<19:03:00, 3.39it/s] {'loss': 3.0353, 'learning_rate': 6.641168738822212e-07, 'epoch': 5.98} + 37%|███▋ | 138760/371472 [11:01:22<19:03:00, 3.39it/s] 37%|███▋ | 138761/371472 [11:01:22<20:11:14, 3.20it/s] 37%|███▋ | 138762/371472 [11:01:22<19:50:15, 3.26it/s] 37%|███▋ | 138763/371472 [11:01:23<20:07:39, 3.21it/s] 37%|███▋ | 138764/371472 [11:01:23<19:37:01, 3.30it/s] 37%|███▋ | 138765/371472 [11:01:23<18:57:25, 3.41it/s] 37%|███▋ | 138766/371472 [11:01:23<18:43:26, 3.45it/s] 37%|███▋ | 138767/371472 [11:01:24<18:21:55, 3.52it/s] 37%|███▋ | 138768/371472 [11:01:24<18:34:05, 3.48it/s] 37%|███▋ | 138769/371472 [11:01:24<18:52:22, 3.42it/s] 37%|███▋ | 138770/371472 [11:01:24<18:13:50, 3.55it/s] 37%|███▋ | 138771/371472 [11:01:25<17:40:43, 3.66it/s] 37%|███▋ | 138772/371472 [11:01:25<18:30:22, 3.49it/s] 37%|███▋ | 138773/371472 [11:01:25<17:59:08, 3.59it/s] 37%|███▋ | 138774/371472 [11:01:26<17:33:47, 3.68it/s] 37%|███▋ | 138775/371472 [11:01:26<18:24:36, 3.51it/s] 37%|███▋ | 138776/371472 [11:01:26<17:47:02, 3.63it/s] 37%|███▋ | 138777/371472 [11:01:26<17:16:22, 3.74it/s] 37%|███▋ | 138778/371472 [11:01:27<19:29:53, 3.32it/s] 37%|███▋ | 138779/371472 [11:01:27<19:09:58, 3.37it/s] 37%|███▋ | 138780/371472 [11:01:27<19:04:35, 3.39it/s] {'loss': 2.9982, 'learning_rate': 6.640683919067422e-07, 'epoch': 5.98} + 37%|███▋ | 138780/371472 [11:01:27<19:04:35, 3.39it/s] 37%|███▋ | 138781/371472 [11:01:28<19:09:04, 3.38it/s] 37%|███▋ | 138782/371472 [11:01:28<18:09:16, 3.56it/s] 37%|███▋ | 138783/371472 [11:01:28<18:04:58, 3.57it/s] 37%|███▋ | 138784/371472 [11:01:29<19:26:58, 3.32it/s] 37%|███▋ | 138785/371472 [11:01:29<19:06:07, 3.38it/s] 37%|███▋ | 138786/371472 [11:01:29<18:31:07, 3.49it/s] 37%|███�� | 138787/371472 [11:01:29<18:52:46, 3.42it/s] 37%|███▋ | 138788/371472 [11:01:30<18:57:22, 3.41it/s] 37%|███▋ | 138789/371472 [11:01:30<18:52:57, 3.42it/s] 37%|███▋ | 138790/371472 [11:01:30<20:40:43, 3.13it/s] 37%|███▋ | 138791/371472 [11:01:31<20:13:49, 3.19it/s] 37%|███▋ | 138792/371472 [11:01:31<19:43:04, 3.28it/s] 37%|███▋ | 138793/371472 [11:01:31<19:38:09, 3.29it/s] 37%|███▋ | 138794/371472 [11:01:31<18:46:01, 3.44it/s] 37%|███▋ | 138795/371472 [11:01:32<17:56:11, 3.60it/s] 37%|███▋ | 138796/371472 [11:01:32<17:44:56, 3.64it/s] 37%|███▋ | 138797/371472 [11:01:32<18:03:11, 3.58it/s] 37%|███▋ | 138798/371472 [11:01:33<17:32:27, 3.68it/s] 37%|███▋ | 138799/371472 [11:01:33<19:13:36, 3.36it/s] 37%|███▋ | 138800/371472 [11:01:33<18:38:46, 3.47it/s] {'loss': 3.18, 'learning_rate': 6.640199099312633e-07, 'epoch': 5.98} + 37%|███▋ | 138800/371472 [11:01:33<18:38:46, 3.47it/s] 37%|███▋ | 138801/371472 [11:01:33<18:40:23, 3.46it/s] 37%|███▋ | 138802/371472 [11:01:34<19:01:37, 3.40it/s] 37%|███▋ | 138803/371472 [11:01:34<18:39:25, 3.46it/s] 37%|███▋ | 138804/371472 [11:01:34<17:58:21, 3.60it/s] 37%|███▋ | 138805/371472 [11:01:35<17:27:33, 3.70it/s] 37%|███▋ | 138806/371472 [11:01:35<16:53:09, 3.83it/s] 37%|███▋ | 138807/371472 [11:01:35<17:33:30, 3.68it/s] 37%|███▋ | 138808/371472 [11:01:35<17:47:36, 3.63it/s] 37%|███▋ | 138809/371472 [11:01:36<17:44:37, 3.64it/s] 37%|███▋ | 138810/371472 [11:01:36<18:53:27, 3.42it/s] 37%|███▋ | 138811/371472 [11:01:36<18:16:11, 3.54it/s] 37%|███▋ | 138812/371472 [11:01:37<17:57:08, 3.60it/s] 37%|███▋ | 138813/371472 [11:01:37<18:08:48, 3.56it/s] 37%|███▋ | 138814/371472 [11:01:37<17:51:21, 3.62it/s] 37%|███▋ | 138815/371472 [11:01:37<17:06:46, 3.78it/s] 37%|███▋ | 138816/371472 [11:01:38<16:31:37, 3.91it/s] 37%|███▋ | 138817/371472 [11:01:38<18:40:02, 3.46it/s] 37%|███▋ | 138818/371472 [11:01:38<17:41:43, 3.65it/s] 37%|███▋ | 138819/371472 [11:01:38<17:35:35, 3.67it/s] 37%|███▋ | 138820/371472 [11:01:39<17:49:18, 3.63it/s] {'loss': 3.1225, 'learning_rate': 6.639714279557844e-07, 'epoch': 5.98} + 37%|███▋ | 138820/371472 [11:01:39<17:49:18, 3.63it/s] 37%|███▋ | 138821/371472 [11:01:39<17:47:43, 3.63it/s] 37%|███▋ | 138822/371472 [11:01:39<17:07:27, 3.77it/s] 37%|███▋ | 138823/371472 [11:01:39<16:57:10, 3.81it/s] 37%|███▋ | 138824/371472 [11:01:40<17:10:15, 3.76it/s] 37%|███▋ | 138825/371472 [11:01:40<17:00:02, 3.80it/s] 37%|███▋ | 138826/371472 [11:01:40<17:18:01, 3.74it/s] 37%|███▋ | 138827/371472 [11:01:41<17:29:11, 3.70it/s] 37%|███▋ | 138828/371472 [11:01:41<17:51:20, 3.62it/s] 37%|███▋ | 138829/371472 [11:01:41<17:48:08, 3.63it/s] 37%|███▋ | 138830/371472 [11:01:41<18:59:37, 3.40it/s] 37%|███▋ | 138831/371472 [11:01:42<18:17:08, 3.53it/s] 37%|███▋ | 138832/371472 [11:01:42<17:47:06, 3.63it/s] 37%|███▋ | 138833/371472 [11:01:42<18:47:40, 3.44it/s] 37%|███▋ | 138834/371472 [11:01:43<17:55:31, 3.61it/s] 37%|███▋ | 138835/371472 [11:01:43<17:18:05, 3.74it/s] 37%|███▋ | 138836/371472 [11:01:43<17:25:32, 3.71it/s] 37%|███▋ | 138837/371472 [11:01:43<17:57:55, 3.60it/s] 37%|███▋ | 138838/371472 [11:01:44<18:41:15, 3.46it/s] 37%|███▋ | 138839/371472 [11:01:44<21:01:20, 3.07it/s] 37%|███▋ | 138840/371472 [11:01:44<20:33:05, 3.14it/s] {'loss': 3.0631, 'learning_rate': 6.639229459803056e-07, 'epoch': 5.98} + 37%|███▋ | 138840/371472 [11:01:44<20:33:05, 3.14it/s] 37%|███▋ | 138841/371472 [11:01:45<21:13:22, 3.04it/s] 37%|███▋ | 138842/371472 [11:01:45<20:02:42, 3.22it/s] 37%|███▋ | 138843/371472 [11:01:45<19:36:28, 3.30it/s] 37%|███▋ | 138844/371472 [11:01:46<20:30:20, 3.15it/s] 37%|███▋ | 138845/371472 [11:01:46<19:59:36, 3.23it/s] 37%|███▋ | 138846/371472 [11:01:46<19:06:13, 3.38it/s] 37%|███▋ | 138847/371472 [11:01:47<19:44:46, 3.27it/s] 37%|███▋ | 138848/371472 [11:01:47<18:55:31, 3.41it/s] 37%|███▋ | 138849/371472 [11:01:47<18:11:00, 3.55it/s] 37%|███▋ | 138850/371472 [11:01:47<17:55:54, 3.60it/s] 37%|███▋ | 138851/371472 [11:01:48<17:08:23, 3.77it/s] 37%|███▋ | 138852/371472 [11:01:48<17:16:47, 3.74it/s] 37%|███▋ | 138853/371472 [11:01:48<19:01:21, 3.40it/s] 37%|███▋ | 138854/371472 [11:01:48<18:12:10, 3.55it/s] 37%|███▋ | 138855/371472 [11:01:49<18:16:59, 3.53it/s] 37%|███▋ | 138856/371472 [11:01:49<18:16:50, 3.53it/s] 37%|███▋ | 138857/371472 [11:01:49<18:03:11, 3.58it/s] 37%|███▋ | 138858/371472 [11:01:50<17:27:40, 3.70it/s] 37%|███▋ | 138859/371472 [11:01:50<17:53:36, 3.61it/s] 37%|███▋ | 138860/371472 [11:01:50<19:19:49, 3.34it/s] {'loss': 3.1189, 'learning_rate': 6.638744640048266e-07, 'epoch': 5.98} + 37%|███▋ | 138860/371472 [11:01:50<19:19:49, 3.34it/s] 37%|███▋ | 138861/371472 [11:01:50<19:00:30, 3.40it/s] 37%|███▋ | 138862/371472 [11:01:51<18:14:48, 3.54it/s] 37%|███▋ | 138863/371472 [11:01:51<17:29:55, 3.69it/s] 37%|███▋ | 138864/371472 [11:01:51<16:56:36, 3.81it/s] 37%|███▋ | 138865/371472 [11:01:52<18:35:54, 3.47it/s] 37%|███▋ | 138866/371472 [11:01:52<18:04:49, 3.57it/s] 37%|███▋ | 138867/371472 [11:01:52<17:29:46, 3.69it/s] 37%|███▋ | 138868/371472 [11:01:52<19:02:50, 3.39it/s] 37%|███▋ | 138869/371472 [11:01:53<19:32:48, 3.31it/s] 37%|███▋ | 138870/371472 [11:01:53<19:01:10, 3.40it/s] 37%|███▋ | 138871/371472 [11:01:53<18:07:40, 3.56it/s] 37%|███▋ | 138872/371472 [11:01:53<17:47:22, 3.63it/s] 37%|███▋ | 138873/371472 [11:01:54<18:04:46, 3.57it/s] 37%|███▋ | 138874/371472 [11:01:54<18:15:44, 3.54it/s] 37%|███▋ | 138875/371472 [11:01:54<18:06:53, 3.57it/s] 37%|███▋ | 138876/371472 [11:01:55<17:48:34, 3.63it/s] 37%|███▋ | 138877/371472 [11:01:55<20:48:30, 3.10it/s] 37%|███▋ | 138878/371472 [11:01:55<20:46:36, 3.11it/s] 37%|███▋ | 138879/371472 [11:01:56<19:19:14, 3.34it/s] 37%|███▋ | 138880/371472 [11:01:56<19:03:54, 3.39it/s] {'loss': 3.0192, 'learning_rate': 6.638259820293477e-07, 'epoch': 5.98} + 37%|███▋ | 138880/371472 [11:01:56<19:03:54, 3.39it/s] 37%|███▋ | 138881/371472 [11:01:56<18:13:21, 3.55it/s] 37%|███▋ | 138882/371472 [11:01:56<18:10:02, 3.56it/s] 37%|███▋ | 138883/371472 [11:01:57<17:55:23, 3.60it/s] 37%|███▋ | 138884/371472 [11:01:57<17:54:09, 3.61it/s] 37%|███▋ | 138885/371472 [11:01:57<17:25:53, 3.71it/s] 37%|███▋ | 138886/371472 [11:01:57<17:18:45, 3.73it/s] 37%|███▋ | 138887/371472 [11:01:58<17:15:28, 3.74it/s] 37%|███▋ | 138888/371472 [11:01:58<18:01:01, 3.59it/s] 37%|███▋ | 138889/371472 [11:01:58<17:25:54, 3.71it/s] 37%|███▋ | 138890/371472 [11:01:59<18:56:36, 3.41it/s] 37%|███▋ | 138891/371472 [11:01:59<18:09:09, 3.56it/s] 37%|███▋ | 138892/371472 [11:01:59<18:03:47, 3.58it/s] 37%|███▋ | 138893/371472 [11:01:59<18:15:37, 3.54it/s] 37%|███▋ | 138894/371472 [11:02:00<17:39:05, 3.66it/s] 37%|███▋ | 138895/371472 [11:02:00<17:44:37, 3.64it/s] 37%|███▋ | 138896/371472 [11:02:00<18:52:27, 3.42it/s] 37%|███▋ | 138897/371472 [11:02:01<18:52:58, 3.42it/s] 37%|███▋ | 138898/371472 [11:02:01<18:43:19, 3.45it/s] 37%|███▋ | 138899/371472 [11:02:01<19:11:31, 3.37it/s] 37%|███▋ | 138900/371472 [11:02:02<18:54:49, 3.42it/s] {'loss': 3.21, 'learning_rate': 6.637775000538689e-07, 'epoch': 5.98} + 37%|███▋ | 138900/371472 [11:02:02<18:54:49, 3.42it/s] 37%|███▋ | 138901/371472 [11:02:02<18:28:15, 3.50it/s] 37%|███▋ | 138902/371472 [11:02:02<18:58:06, 3.41it/s] 37%|███▋ | 138903/371472 [11:02:02<18:27:12, 3.50it/s] 37%|███▋ | 138904/371472 [11:02:03<18:51:32, 3.43it/s] 37%|███▋ | 138905/371472 [11:02:03<17:41:30, 3.65it/s] 37%|███▋ | 138906/371472 [11:02:03<18:02:25, 3.58it/s] 37%|███▋ | 138907/371472 [11:02:03<17:42:54, 3.65it/s] 37%|███▋ | 138908/371472 [11:02:04<18:49:50, 3.43it/s] 37%|███▋ | 138909/371472 [11:02:04<21:14:47, 3.04it/s] 37%|███▋ | 138910/371472 [11:02:04<20:13:11, 3.19it/s] 37%|███▋ | 138911/371472 [11:02:05<19:42:33, 3.28it/s] 37%|███▋ | 138912/371472 [11:02:05<18:57:53, 3.41it/s] 37%|███▋ | 138913/371472 [11:02:05<19:01:19, 3.40it/s] 37%|███▋ | 138914/371472 [11:02:06<19:15:12, 3.36it/s] 37%|███▋ | 138915/371472 [11:02:06<18:27:36, 3.50it/s] 37%|███▋ | 138916/371472 [11:02:06<18:00:22, 3.59it/s] 37%|███▋ | 138917/371472 [11:02:06<17:38:24, 3.66it/s] 37%|███▋ | 138918/371472 [11:02:07<17:37:11, 3.67it/s] 37%|███▋ | 138919/371472 [11:02:07<17:32:29, 3.68it/s] 37%|███▋ | 138920/371472 [11:02:07<19:22:46, 3.33it/s] {'loss': 3.3042, 'learning_rate': 6.6372901807839e-07, 'epoch': 5.98} + 37%|███▋ | 138920/371472 [11:02:07<19:22:46, 3.33it/s] 37%|███▋ | 138921/371472 [11:02:08<18:24:40, 3.51it/s] 37%|███▋ | 138922/371472 [11:02:08<17:42:11, 3.65it/s] 37%|███▋ | 138923/371472 [11:02:08<17:02:25, 3.79it/s] 37%|███▋ | 138924/371472 [11:02:08<17:57:26, 3.60it/s] 37%|███▋ | 138925/371472 [11:02:09<17:55:40, 3.60it/s] 37%|███▋ | 138926/371472 [11:02:09<17:23:29, 3.71it/s] 37%|███▋ | 138927/371472 [11:02:09<17:49:58, 3.62it/s] 37%|███▋ | 138928/371472 [11:02:10<18:42:17, 3.45it/s] 37%|███▋ | 138929/371472 [11:02:10<17:47:12, 3.63it/s] 37%|███▋ | 138930/371472 [11:02:10<19:50:16, 3.26it/s] 37%|███▋ | 138931/371472 [11:02:10<19:43:28, 3.27it/s] 37%|███▋ | 138932/371472 [11:02:11<21:01:00, 3.07it/s] 37%|███▋ | 138933/371472 [11:02:11<19:30:00, 3.31it/s] 37%|███▋ | 138934/371472 [11:02:11<18:36:26, 3.47it/s] 37%|███▋ | 138935/371472 [11:02:12<18:43:56, 3.45it/s] 37%|███▋ | 138936/371472 [11:02:12<18:22:33, 3.52it/s] 37%|███▋ | 138937/371472 [11:02:12<18:12:01, 3.55it/s] 37%|███▋ | 138938/371472 [11:02:12<17:33:30, 3.68it/s] 37%|███▋ | 138939/371472 [11:02:13<17:53:53, 3.61it/s] 37%|███▋ | 138940/371472 [11:02:13<17:17:09, 3.74it/s] {'loss': 3.2292, 'learning_rate': 6.63680536102911e-07, 'epoch': 5.98} + 37%|███▋ | 138940/371472 [11:02:13<17:17:09, 3.74it/s] 37%|███▋ | 138941/371472 [11:02:13<19:42:53, 3.28it/s] 37%|███▋ | 138942/371472 [11:02:14<19:56:18, 3.24it/s] 37%|███▋ | 138943/371472 [11:02:14<19:55:11, 3.24it/s] 37%|███▋ | 138944/371472 [11:02:14<19:28:31, 3.32it/s] 37%|███▋ | 138945/371472 [11:02:15<19:17:20, 3.35it/s] 37%|███▋ | 138946/371472 [11:02:15<20:26:39, 3.16it/s] 37%|███▋ | 138947/371472 [11:02:15<19:15:54, 3.35it/s] 37%|███▋ | 138948/371472 [11:02:15<19:34:25, 3.30it/s] 37%|███▋ | 138949/371472 [11:02:16<19:20:57, 3.34it/s] 37%|███▋ | 138950/371472 [11:02:16<19:16:40, 3.35it/s] 37%|███▋ | 138951/371472 [11:02:16<18:42:23, 3.45it/s] 37%|███▋ | 138952/371472 [11:02:17<17:45:52, 3.64it/s] 37%|███▋ | 138953/371472 [11:02:17<16:50:55, 3.83it/s] 37%|███▋ | 138954/371472 [11:02:17<16:49:20, 3.84it/s] 37%|███▋ | 138955/371472 [11:02:17<17:11:50, 3.76it/s] 37%|███▋ | 138956/371472 [11:02:18<17:35:59, 3.67it/s] 37%|███▋ | 138957/371472 [11:02:18<18:05:25, 3.57it/s] 37%|███▋ | 138958/371472 [11:02:18<18:13:07, 3.55it/s] 37%|███▋ | 138959/371472 [11:02:18<17:53:04, 3.61it/s] 37%|███▋ | 138960/371472 [11:02:19<18:26:36, 3.50it/s] {'loss': 3.1658, 'learning_rate': 6.636320541274321e-07, 'epoch': 5.99} + 37%|███▋ | 138960/371472 [11:02:19<18:26:36, 3.50it/s] 37%|███▋ | 138961/371472 [11:02:19<18:40:18, 3.46it/s] 37%|███▋ | 138962/371472 [11:02:19<18:28:25, 3.50it/s] 37%|███▋ | 138963/371472 [11:02:20<17:22:11, 3.72it/s] 37%|███▋ | 138964/371472 [11:02:20<17:45:50, 3.64it/s] 37%|███▋ | 138965/371472 [11:02:20<17:57:05, 3.60it/s] 37%|███▋ | 138966/371472 [11:02:20<17:53:04, 3.61it/s] 37%|███▋ | 138967/371472 [11:02:21<17:41:28, 3.65it/s] 37%|███▋ | 138968/371472 [11:02:21<17:47:57, 3.63it/s] 37%|███▋ | 138969/371472 [11:02:21<18:22:37, 3.51it/s] 37%|███▋ | 138970/371472 [11:02:22<18:44:00, 3.45it/s] 37%|███▋ | 138971/371472 [11:02:22<17:29:30, 3.69it/s] 37%|███▋ | 138972/371472 [11:02:22<17:12:52, 3.75it/s] 37%|███▋ | 138973/371472 [11:02:22<16:43:05, 3.86it/s] 37%|███▋ | 138974/371472 [11:02:23<17:06:12, 3.78it/s] 37%|███▋ | 138975/371472 [11:02:23<16:34:42, 3.90it/s] 37%|███▋ | 138976/371472 [11:02:23<16:53:36, 3.82it/s] 37%|███▋ | 138977/371472 [11:02:23<17:30:04, 3.69it/s] 37%|███▋ | 138978/371472 [11:02:24<17:25:10, 3.71it/s] 37%|███▋ | 138979/371472 [11:02:24<18:35:52, 3.47it/s] 37%|███▋ | 138980/371472 [11:02:24<18:06:35, 3.57it/s] {'loss': 3.2374, 'learning_rate': 6.635835721519533e-07, 'epoch': 5.99} + 37%|███▋ | 138980/371472 [11:02:24<18:06:35, 3.57it/s] 37%|███▋ | 138981/371472 [11:02:25<18:45:06, 3.44it/s] 37%|███▋ | 138982/371472 [11:02:25<19:00:21, 3.40it/s] 37%|███▋ | 138983/371472 [11:02:25<19:04:19, 3.39it/s] 37%|███▋ | 138984/371472 [11:02:25<18:59:25, 3.40it/s] 37%|███▋ | 138985/371472 [11:02:26<18:21:59, 3.52it/s] 37%|███▋ | 138986/371472 [11:02:26<18:16:43, 3.53it/s] 37%|███▋ | 138987/371472 [11:02:26<17:32:29, 3.68it/s] 37%|███▋ | 138988/371472 [11:02:26<17:18:45, 3.73it/s] 37%|███▋ | 138989/371472 [11:02:27<16:41:13, 3.87it/s] 37%|███▋ | 138990/371472 [11:02:27<16:31:15, 3.91it/s] 37%|███▋ | 138991/371472 [11:02:27<16:56:25, 3.81it/s] 37%|███▋ | 138992/371472 [11:02:27<16:24:40, 3.93it/s] 37%|███▋ | 138993/371472 [11:02:28<17:36:42, 3.67it/s] 37%|███▋ | 138994/371472 [11:02:28<16:57:27, 3.81it/s] 37%|███▋ | 138995/371472 [11:02:28<16:56:15, 3.81it/s] 37%|███▋ | 138996/371472 [11:02:29<17:19:47, 3.73it/s] 37%|███▋ | 138997/371472 [11:02:29<16:37:42, 3.88it/s] 37%|███▋ | 138998/371472 [11:02:29<16:57:40, 3.81it/s] 37%|███▋ | 138999/371472 [11:02:29<16:45:43, 3.85it/s] 37%|███▋ | 139000/371472 [11:02:30<17:43:54, 3.64it/s] {'loss': 3.0771, 'learning_rate': 6.635350901764743e-07, 'epoch': 5.99} + 37%|███▋ | 139000/371472 [11:02:30<17:43:54, 3.64it/s] 37%|███▋ | 139001/371472 [11:02:30<18:22:33, 3.51it/s] 37%|███▋ | 139002/371472 [11:02:30<18:07:47, 3.56it/s] 37%|███▋ | 139003/371472 [11:02:30<17:13:51, 3.75it/s] 37%|███▋ | 139004/371472 [11:02:31<16:58:55, 3.80it/s] 37%|███▋ | 139005/371472 [11:02:31<17:00:42, 3.80it/s] 37%|███▋ | 139006/371472 [11:02:31<16:58:03, 3.81it/s] 37%|███▋ | 139007/371472 [11:02:32<19:22:46, 3.33it/s] 37%|███▋ | 139008/371472 [11:02:32<19:58:56, 3.23it/s] 37%|███▋ | 139009/371472 [11:02:32<19:10:02, 3.37it/s] 37%|███▋ | 139010/371472 [11:02:32<18:27:54, 3.50it/s] 37%|███▋ | 139011/371472 [11:02:33<18:56:45, 3.41it/s] 37%|███▋ | 139012/371472 [11:02:33<18:35:50, 3.47it/s] 37%|███▋ | 139013/371472 [11:02:33<18:54:57, 3.41it/s] 37%|███▋ | 139014/371472 [11:02:34<18:47:35, 3.44it/s] 37%|███▋ | 139015/371472 [11:02:34<18:29:21, 3.49it/s] 37%|███▋ | 139016/371472 [11:02:34<17:51:44, 3.61it/s] 37%|███▋ | 139017/371472 [11:02:35<19:02:26, 3.39it/s] 37%|███▋ | 139018/371472 [11:02:35<17:58:01, 3.59it/s] 37%|███▋ | 139019/371472 [11:02:35<18:52:28, 3.42it/s] 37%|███▋ | 139020/371472 [11:02:35<18:04:30, 3.57it/s] {'loss': 3.0023, 'learning_rate': 6.634866082009954e-07, 'epoch': 5.99} + 37%|███▋ | 139020/371472 [11:02:35<18:04:30, 3.57it/s] 37%|███▋ | 139021/371472 [11:02:36<18:08:46, 3.56it/s] 37%|███▋ | 139022/371472 [11:02:36<19:14:19, 3.36it/s] 37%|███▋ | 139023/371472 [11:02:36<18:46:47, 3.44it/s] 37%|███▋ | 139024/371472 [11:02:37<18:13:40, 3.54it/s] 37%|███▋ | 139025/371472 [11:02:37<17:42:51, 3.64it/s] 37%|███▋ | 139026/371472 [11:02:37<17:40:46, 3.65it/s] 37%|███▋ | 139027/371472 [11:02:37<17:40:14, 3.65it/s] 37%|███▋ | 139028/371472 [11:02:38<18:00:40, 3.58it/s] 37%|███▋ | 139029/371472 [11:02:38<18:18:43, 3.53it/s] 37%|███▋ | 139030/371472 [11:02:38<19:51:33, 3.25it/s] 37%|███▋ | 139031/371472 [11:02:38<18:22:09, 3.51it/s] 37%|███▋ | 139032/371472 [11:02:39<18:22:52, 3.51it/s] 37%|███▋ | 139033/371472 [11:02:39<20:27:11, 3.16it/s] 37%|███▋ | 139034/371472 [11:02:40<22:01:41, 2.93it/s] 37%|███▋ | 139035/371472 [11:02:40<20:50:47, 3.10it/s] 37%|███▋ | 139036/371472 [11:02:40<19:42:50, 3.28it/s] 37%|███▋ | 139037/371472 [11:02:40<20:55:55, 3.08it/s] 37%|███▋ | 139038/371472 [11:02:41<19:32:51, 3.30it/s] 37%|███▋ | 139039/371472 [11:02:41<18:25:44, 3.50it/s] 37%|███▋ | 139040/371472 [11:02:41<20:50:04, 3.10it/s] {'loss': 3.1219, 'learning_rate': 6.634381262255166e-07, 'epoch': 5.99} + 37%|███▋ | 139040/371472 [11:02:41<20:50:04, 3.10it/s] 37%|███▋ | 139041/371472 [11:02:42<20:56:55, 3.08it/s] 37%|███▋ | 139042/371472 [11:02:42<19:18:10, 3.34it/s] 37%|███▋ | 139043/371472 [11:02:42<18:09:19, 3.56it/s] 37%|███▋ | 139044/371472 [11:02:42<17:55:49, 3.60it/s] 37%|███▋ | 139045/371472 [11:02:43<17:40:48, 3.65it/s] 37%|███▋ | 139046/371472 [11:02:43<17:45:22, 3.64it/s] 37%|███▋ | 139047/371472 [11:02:43<17:40:53, 3.65it/s] 37%|███▋ | 139048/371472 [11:02:44<17:21:47, 3.72it/s] 37%|███▋ | 139049/371472 [11:02:44<17:06:03, 3.78it/s] 37%|███▋ | 139050/371472 [11:02:44<17:17:29, 3.73it/s] 37%|███▋ | 139051/371472 [11:02:44<17:47:41, 3.63it/s] 37%|███▋ | 139052/371472 [11:02:45<17:34:30, 3.67it/s] 37%|███▋ | 139053/371472 [11:02:45<17:48:58, 3.62it/s] 37%|███▋ | 139054/371472 [11:02:45<17:38:47, 3.66it/s] 37%|███▋ | 139055/371472 [11:02:46<19:13:09, 3.36it/s] 37%|███▋ | 139056/371472 [11:02:46<18:22:15, 3.51it/s] 37%|███▋ | 139057/371472 [11:02:46<18:09:19, 3.56it/s] 37%|███▋ | 139058/371472 [11:02:46<17:42:59, 3.64it/s] 37%|███▋ | 139059/371472 [11:02:47<17:24:58, 3.71it/s] 37%|███▋ | 139060/371472 [11:02:47<17:43:53, 3.64it/s] {'loss': 3.0883, 'learning_rate': 6.633896442500378e-07, 'epoch': 5.99} + 37%|███▋ | 139060/371472 [11:02:47<17:43:53, 3.64it/s] 37%|███▋ | 139061/371472 [11:02:47<18:11:47, 3.55it/s] 37%|███▋ | 139062/371472 [11:02:47<17:41:45, 3.65it/s] 37%|███▋ | 139063/371472 [11:02:48<17:51:12, 3.62it/s] 37%|███▋ | 139064/371472 [11:02:48<18:11:55, 3.55it/s] 37%|███▋ | 139065/371472 [11:02:48<17:48:12, 3.63it/s] 37%|███▋ | 139066/371472 [11:02:49<18:39:11, 3.46it/s] 37%|███▋ | 139067/371472 [11:02:49<17:50:10, 3.62it/s] 37%|███▋ | 139068/371472 [11:02:49<17:55:53, 3.60it/s] 37%|███▋ | 139069/371472 [11:02:49<18:02:30, 3.58it/s] 37%|███▋ | 139070/371472 [11:02:50<17:40:08, 3.65it/s] 37%|███▋ | 139071/371472 [11:02:50<17:02:36, 3.79it/s] 37%|███▋ | 139072/371472 [11:02:50<17:00:08, 3.80it/s] 37%|███▋ | 139073/371472 [11:02:50<17:45:45, 3.63it/s] 37%|███▋ | 139074/371472 [11:02:51<19:44:54, 3.27it/s] 37%|███▋ | 139075/371472 [11:02:51<19:51:19, 3.25it/s] 37%|███▋ | 139076/371472 [11:02:51<18:41:48, 3.45it/s] 37%|███▋ | 139077/371472 [11:02:52<17:41:41, 3.65it/s] 37%|███▋ | 139078/371472 [11:02:52<17:33:55, 3.68it/s] 37%|███▋ | 139079/371472 [11:02:52<18:13:41, 3.54it/s] 37%|███▋ | 139080/371472 [11:02:53<18:36:00, 3.47it/s] {'loss': 3.1024, 'learning_rate': 6.633411622745587e-07, 'epoch': 5.99} + 37%|███▋ | 139080/371472 [11:02:53<18:36:00, 3.47it/s] 37%|███▋ | 139081/371472 [11:02:53<20:31:15, 3.15it/s] 37%|███▋ | 139082/371472 [11:02:53<19:06:32, 3.38it/s] 37%|███▋ | 139083/371472 [11:02:53<19:35:56, 3.29it/s] 37%|███▋ | 139084/371472 [11:02:54<19:41:49, 3.28it/s] 37%|███▋ | 139085/371472 [11:02:54<21:11:39, 3.05it/s] 37%|███▋ | 139086/371472 [11:02:54<19:30:32, 3.31it/s] 37%|███▋ | 139087/371472 [11:02:55<18:32:17, 3.48it/s] 37%|███▋ | 139088/371472 [11:02:55<18:21:01, 3.52it/s] 37%|███▋ | 139089/371472 [11:02:55<17:48:20, 3.63it/s] 37%|███▋ | 139090/371472 [11:02:55<18:00:33, 3.58it/s] 37%|███▋ | 139091/371472 [11:02:56<18:04:57, 3.57it/s] 37%|███▋ | 139092/371472 [11:02:56<17:27:48, 3.70it/s] 37%|███▋ | 139093/371472 [11:02:56<18:24:41, 3.51it/s] 37%|███▋ | 139094/371472 [11:02:57<18:07:26, 3.56it/s] 37%|███▋ | 139095/371472 [11:02:57<18:02:20, 3.58it/s] 37%|███▋ | 139096/371472 [11:02:57<19:12:07, 3.36it/s] 37%|███▋ | 139097/371472 [11:02:57<18:40:51, 3.46it/s] 37%|███▋ | 139098/371472 [11:02:58<18:03:21, 3.57it/s] 37%|███▋ | 139099/371472 [11:02:58<19:04:38, 3.38it/s] 37%|███▋ | 139100/371472 [11:02:58<18:23:03, 3.51it/s] {'loss': 3.1098, 'learning_rate': 6.632926802990798e-07, 'epoch': 5.99} + 37%|███▋ | 139100/371472 [11:02:58<18:23:03, 3.51it/s] 37%|███▋ | 139101/371472 [11:02:59<17:59:19, 3.59it/s] 37%|███▋ | 139102/371472 [11:02:59<17:49:38, 3.62it/s] 37%|███▋ | 139103/371472 [11:02:59<17:34:18, 3.67it/s] 37%|███▋ | 139104/371472 [11:02:59<17:00:28, 3.80it/s] 37%|███▋ | 139105/371472 [11:03:00<16:38:51, 3.88it/s] 37%|███▋ | 139106/371472 [11:03:00<17:08:30, 3.77it/s] 37%|███▋ | 139107/371472 [11:03:00<18:14:54, 3.54it/s] 37%|███▋ | 139108/371472 [11:03:01<19:48:27, 3.26it/s] 37%|███▋ | 139109/371472 [11:03:01<19:37:41, 3.29it/s] 37%|███▋ | 139110/371472 [11:03:01<18:33:02, 3.48it/s] 37%|███▋ | 139111/371472 [11:03:01<17:33:34, 3.68it/s] 37%|███▋ | 139112/371472 [11:03:02<17:19:04, 3.73it/s] 37%|███▋ | 139113/371472 [11:03:02<18:03:18, 3.57it/s] 37%|███▋ | 139114/371472 [11:03:02<17:47:35, 3.63it/s] 37%|███▋ | 139115/371472 [11:03:02<17:30:13, 3.69it/s] 37%|███▋ | 139116/371472 [11:03:03<17:30:40, 3.69it/s] 37%|███▋ | 139117/371472 [11:03:03<17:51:57, 3.61it/s] 37%|███▋ | 139118/371472 [11:03:03<17:29:12, 3.69it/s] 37%|███▋ | 139119/371472 [11:03:04<17:09:21, 3.76it/s] 37%|███▋ | 139120/371472 [11:03:04<17:30:39, 3.69it/s] {'loss': 3.1583, 'learning_rate': 6.63244198323601e-07, 'epoch': 5.99} + 37%|███▋ | 139120/371472 [11:03:04<17:30:39, 3.69it/s] 37%|███▋ | 139121/371472 [11:03:04<17:36:37, 3.66it/s] 37%|███▋ | 139122/371472 [11:03:04<17:23:45, 3.71it/s] 37%|███▋ | 139123/371472 [11:03:05<16:33:36, 3.90it/s] 37%|███▋ | 139124/371472 [11:03:05<18:43:50, 3.45it/s] 37%|███▋ | 139125/371472 [11:03:05<18:37:41, 3.46it/s] 37%|███▋ | 139126/371472 [11:03:06<18:55:54, 3.41it/s] 37%|███▋ | 139127/371472 [11:03:06<18:27:34, 3.50it/s] 37%|███▋ | 139128/371472 [11:03:06<17:48:07, 3.63it/s] 37%|███▋ | 139129/371472 [11:03:06<19:31:55, 3.30it/s] 37%|███▋ | 139130/371472 [11:03:07<18:32:01, 3.48it/s] 37%|███▋ | 139131/371472 [11:03:07<17:55:49, 3.60it/s] 37%|███▋ | 139132/371472 [11:03:07<17:48:02, 3.63it/s] 37%|███▋ | 139133/371472 [11:03:07<18:23:21, 3.51it/s] 37%|███▋ | 139134/371472 [11:03:08<18:34:15, 3.48it/s] 37%|███▋ | 139135/371472 [11:03:08<20:36:00, 3.13it/s] 37%|███▋ | 139136/371472 [11:03:08<20:00:50, 3.22it/s] 37%|███▋ | 139137/371472 [11:03:09<18:33:12, 3.48it/s] 37%|███▋ | 139138/371472 [11:03:09<18:00:40, 3.58it/s] 37%|███▋ | 139139/371472 [11:03:09<20:26:23, 3.16it/s] 37%|███▋ | 139140/371472 [11:03:10<19:21:45, 3.33it/s] {'loss': 3.2578, 'learning_rate': 6.63195716348122e-07, 'epoch': 5.99} + 37%|███▋ | 139140/371472 [11:03:10<19:21:45, 3.33it/s] 37%|███▋ | 139141/371472 [11:03:10<18:30:37, 3.49it/s] 37%|███▋ | 139142/371472 [11:03:10<17:59:24, 3.59it/s] 37%|███▋ | 139143/371472 [11:03:10<17:08:19, 3.77it/s] 37%|███▋ | 139144/371472 [11:03:11<19:26:14, 3.32it/s] 37%|███▋ | 139145/371472 [11:03:11<18:24:37, 3.51it/s] 37%|███▋ | 139146/371472 [11:03:11<17:54:10, 3.60it/s] 37%|███▋ | 139147/371472 [11:03:12<17:13:54, 3.75it/s] 37%|███▋ | 139148/371472 [11:03:12<18:19:16, 3.52it/s] 37%|███▋ | 139149/371472 [11:03:12<18:06:32, 3.56it/s] 37%|███▋ | 139150/371472 [11:03:12<17:34:36, 3.67it/s] 37%|███▋ | 139151/371472 [11:03:13<17:11:40, 3.75it/s] 37%|███▋ | 139152/371472 [11:03:13<16:26:07, 3.93it/s] 37%|███▋ | 139153/371472 [11:03:13<16:26:49, 3.92it/s] 37%|███▋ | 139154/371472 [11:03:13<17:31:00, 3.68it/s] 37%|███▋ | 139155/371472 [11:03:14<18:00:39, 3.58it/s] 37%|███▋ | 139156/371472 [11:03:14<17:33:41, 3.67it/s] 37%|███▋ | 139157/371472 [11:03:14<17:39:34, 3.65it/s] 37%|███▋ | 139158/371472 [11:03:15<18:09:57, 3.55it/s] 37%|███▋ | 139159/371472 [11:03:15<17:15:43, 3.74it/s] 37%|███▋ | 139160/371472 [11:03:15<17:20:30, 3.72it/s] {'loss': 2.9869, 'learning_rate': 6.631472343726432e-07, 'epoch': 5.99} + 37%|███▋ | 139160/371472 [11:03:15<17:20:30, 3.72it/s] 37%|███▋ | 139161/371472 [11:03:15<17:45:26, 3.63it/s] 37%|███▋ | 139162/371472 [11:03:16<17:23:53, 3.71it/s] 37%|███▋ | 139163/371472 [11:03:16<17:31:03, 3.68it/s] 37%|███▋ | 139164/371472 [11:03:16<17:20:54, 3.72it/s] 37%|███▋ | 139165/371472 [11:03:16<16:51:56, 3.83it/s] 37%|███▋ | 139166/371472 [11:03:17<16:30:29, 3.91it/s] 37%|███▋ | 139167/371472 [11:03:17<16:52:02, 3.83it/s] 37%|███▋ | 139168/371472 [11:03:17<18:02:12, 3.58it/s] 37%|███▋ | 139169/371472 [11:03:17<17:33:44, 3.67it/s] 37%|███▋ | 139170/371472 [11:03:18<17:39:39, 3.65it/s] 37%|███▋ | 139171/371472 [11:03:18<17:22:05, 3.72it/s] 37%|███▋ | 139172/371472 [11:03:18<17:38:59, 3.66it/s] 37%|███▋ | 139173/371472 [11:03:19<17:05:43, 3.77it/s] 37%|███▋ | 139174/371472 [11:03:19<16:30:41, 3.91it/s] 37%|███▋ | 139175/371472 [11:03:19<16:31:47, 3.90it/s] 37%|███▋ | 139176/371472 [11:03:19<18:22:39, 3.51it/s] 37%|███▋ | 139177/371472 [11:03:20<19:36:04, 3.29it/s] 37%|███▋ | 139178/371472 [11:03:20<19:00:55, 3.39it/s] 37%|███▋ | 139179/371472 [11:03:20<18:42:43, 3.45it/s] 37%|███▋ | 139180/371472 [11:03:21<18:59:05, 3.40it/s] {'loss': 3.0814, 'learning_rate': 6.630987523971643e-07, 'epoch': 5.99} + 37%|███▋ | 139180/371472 [11:03:21<18:59:05, 3.40it/s] 37%|███▋ | 139181/371472 [11:03:21<18:39:03, 3.46it/s] 37%|███▋ | 139182/371472 [11:03:21<18:37:58, 3.46it/s] 37%|███▋ | 139183/371472 [11:03:21<17:53:21, 3.61it/s] 37%|███▋ | 139184/371472 [11:03:22<18:11:51, 3.55it/s] 37%|███▋ | 139185/371472 [11:03:22<17:28:56, 3.69it/s] 37%|███▋ | 139186/371472 [11:03:22<18:04:34, 3.57it/s] 37%|███▋ | 139187/371472 [11:03:23<18:01:18, 3.58it/s] 37%|███▋ | 139188/371472 [11:03:23<19:31:27, 3.30it/s] 37%|███▋ | 139189/371472 [11:03:23<18:15:48, 3.53it/s] 37%|███▋ | 139190/371472 [11:03:23<17:15:13, 3.74it/s] 37%|███▋ | 139191/371472 [11:03:24<17:08:45, 3.76it/s] 37%|███▋ | 139192/371472 [11:03:24<17:27:04, 3.70it/s] 37%|███▋ | 139193/371472 [11:03:24<17:36:39, 3.66it/s] 37%|███▋ | 139194/371472 [11:03:24<18:16:09, 3.53it/s] 37%|███▋ | 139195/371472 [11:03:25<17:32:52, 3.68it/s] 37%|███▋ | 139196/371472 [11:03:25<17:27:51, 3.69it/s] 37%|███▋ | 139197/371472 [11:03:25<17:22:22, 3.71it/s] 37%|███▋ | 139198/371472 [11:03:26<17:23:06, 3.71it/s] 37%|███▋ | 139199/371472 [11:03:26<17:08:42, 3.76it/s] 37%|███▋ | 139200/371472 [11:03:26<16:34:55, 3.89it/s] {'loss': 3.0635, 'learning_rate': 6.630502704216854e-07, 'epoch': 6.0} + 37%|███▋ | 139200/371472 [11:03:26<16:34:55, 3.89it/s] 37%|███▋ | 139201/371472 [11:03:26<16:55:37, 3.81it/s] 37%|███▋ | 139202/371472 [11:03:27<17:01:24, 3.79it/s] 37%|███▋ | 139203/371472 [11:03:27<17:42:08, 3.64it/s] 37%|███▋ | 139204/371472 [11:03:27<17:43:12, 3.64it/s] 37%|███▋ | 139205/371472 [11:03:27<17:47:45, 3.63it/s] 37%|███▋ | 139206/371472 [11:03:28<16:58:56, 3.80it/s] 37%|███▋ | 139207/371472 [11:03:28<17:03:47, 3.78it/s] 37%|███▋ | 139208/371472 [11:03:28<16:58:52, 3.80it/s] 37%|███▋ | 139209/371472 [11:03:28<18:07:46, 3.56it/s] 37%|███▋ | 139210/371472 [11:03:29<17:27:54, 3.69it/s] 37%|███▋ | 139211/371472 [11:03:29<17:35:32, 3.67it/s] 37%|███▋ | 139212/371472 [11:03:29<17:33:36, 3.67it/s] 37%|███▋ | 139213/371472 [11:03:30<18:46:14, 3.44it/s] 37%|███▋ | 139214/371472 [11:03:30<18:03:16, 3.57it/s] 37%|███▋ | 139215/371472 [11:03:30<18:15:30, 3.53it/s] 37%|███▋ | 139216/371472 [11:03:30<18:47:39, 3.43it/s] 37%|███▋ | 139217/371472 [11:03:31<18:37:02, 3.47it/s] 37%|███▋ | 139218/371472 [11:03:31<17:49:51, 3.62it/s] 37%|███▋ | 139219/371472 [11:03:31<17:32:46, 3.68it/s] 37%|███▋ | 139220/371472 [11:03:32<16:55:18, 3.81it/s] {'loss': 3.1107, 'learning_rate': 6.630017884462065e-07, 'epoch': 6.0} + 37%|███▋ | 139220/371472 [11:03:32<16:55:18, 3.81it/s] 37%|███▋ | 139221/371472 [11:03:32<17:42:05, 3.64it/s] 37%|███▋ | 139222/371472 [11:03:32<17:08:16, 3.76it/s] 37%|███▋ | 139223/371472 [11:03:32<17:36:35, 3.66it/s] 37%|███▋ | 139224/371472 [11:03:33<17:29:09, 3.69it/s] 37%|███▋ | 139225/371472 [11:03:33<17:14:42, 3.74it/s] 37%|███▋ | 139226/371472 [11:03:33<17:18:17, 3.73it/s] 37%|███▋ | 139227/371472 [11:03:33<17:16:49, 3.73it/s] 37%|███▋ | 139228/371472 [11:03:34<19:09:19, 3.37it/s] 37%|███▋ | 139229/371472 [11:03:34<19:27:13, 3.32it/s] 37%|███▋ | 139230/371472 [11:03:34<18:37:54, 3.46it/s] 37%|███▋ | 139231/371472 [11:03:35<17:32:22, 3.68it/s] 37%|███▋ | 139232/371472 [11:03:35<17:48:41, 3.62it/s] 37%|███▋ | 139233/371472 [11:03:35<17:46:56, 3.63it/s] 37%|███▋ | 139234/371472 [11:03:36<20:38:57, 3.12it/s] 37%|███▋ | 139235/371472 [11:03:36<20:20:20, 3.17it/s] 37%|███▋ | 139236/371472 [11:03:36<19:07:32, 3.37it/s] 37%|███▋ | 139237/371472 [11:03:36<18:28:15, 3.49it/s] 37%|███▋ | 139238/371472 [11:03:37<17:55:26, 3.60it/s] 37%|███▋ | 139239/371472 [11:03:37<17:56:39, 3.59it/s] 37%|███▋ | 139240/371472 [11:03:37<17:07:08, 3.77it/s] {'loss': 2.8631, 'learning_rate': 6.629533064707276e-07, 'epoch': 6.0} + 37%|███▋ | 139240/371472 [11:03:37<17:07:08, 3.77it/s] 37%|███▋ | 139241/371472 [11:03:38<20:15:13, 3.19it/s] 37%|███▋ | 139242/371472 [11:03:38<19:15:12, 3.35it/s] 37%|███▋ | 139243/371472 [11:03:38<19:10:56, 3.36it/s] 37%|███▋ | 139244/371472 [11:03:38<20:02:51, 3.22it/s] 37%|███▋ | 139245/371472 [11:03:39<19:07:13, 3.37it/s] 37%|███▋ | 139246/371472 [11:03:39<19:06:06, 3.38it/s] 37%|███▋ | 139247/371472 [11:03:39<19:34:08, 3.30it/s] 37%|███▋ | 139248/371472 [11:03:40<20:29:30, 3.15it/s] 37%|███▋ | 139249/371472 [11:03:40<23:15:14, 2.77it/s] 37%|███▋ | 139250/371472 [11:03:40<21:51:39, 2.95it/s] 37%|███▋ | 139251/371472 [11:03:41<20:15:23, 3.18it/s] 37%|███▋ | 139252/371472 [11:03:41<18:52:40, 3.42it/s] 37%|███▋ | 139253/371472 [11:03:41<18:29:56, 3.49it/s] 37%|███▋ | 139254/371472 [11:03:41<18:03:40, 3.57it/s] 37%|███▋ | 139255/371472 [11:03:42<18:38:54, 3.46it/s] 37%|███▋ | 139256/371472 [11:03:42<18:52:40, 3.42it/s] 37%|███▋ | 139257/371472 [11:03:42<18:59:26, 3.40it/s] 37%|███▋ | 139258/371472 [11:03:43<18:09:46, 3.55it/s] 37%|███▋ | 139259/371472 [11:03:43<17:48:31, 3.62it/s] 37%|███▋ | 139260/371472 [11:03:43<18:12:32, 3.54it/s] {'loss': 3.101, 'learning_rate': 6.629048244952487e-07, 'epoch': 6.0} + 37%|███▋ | 139260/371472 [11:03:43<18:12:32, 3.54it/s] 37%|███▋ | 139261/371472 [11:03:44<18:41:01, 3.45it/s] 37%|███▋ | 139262/371472 [11:03:44<18:57:42, 3.40it/s] 37%|███▋ | 139263/371472 [11:03:44<18:11:28, 3.55it/s] 37%|███▋ | 139264/371472 [11:03:44<18:17:10, 3.53it/s] 37%|███▋ | 139265/371472 [11:03:45<17:32:09, 3.68it/s] 37%|███▋ | 139266/371472 [11:03:45<17:05:59, 3.77it/s] 37%|███▋ | 139267/371472 [11:03:45<17:13:06, 3.75it/s] 37%|███▋ | 139268/371472 [11:03:45<17:05:55, 3.77it/s] 37%|███▋ | 139269/371472 [11:03:46<17:01:15, 3.79it/s] 37%|███▋ | 139270/371472 [11:03:46<17:40:52, 3.65it/s] 37%|███▋ | 139271/371472 [11:03:46<17:21:59, 3.71it/s] 37%|███▋ | 139272/371472 [11:03:46<17:21:40, 3.72it/s] 37%|███▋ | 139273/371472 [11:03:47<18:56:24, 3.41it/s] 37%|███▋ | 139274/371472 [11:03:47<18:30:50, 3.48it/s] 37%|███▋ | 139275/371472 [11:03:47<18:28:03, 3.49it/s] 37%|███▋ | 139276/371472 [11:03:48<17:30:54, 3.68it/s] 37%|███▋ | 139277/371472 [11:03:48<17:17:54, 3.73it/s] 37%|███▋ | 139278/371472 [11:03:48<17:53:05, 3.61it/s] 37%|███▋ | 139279/371472 [11:03:48<17:23:23, 3.71it/s] 37%|███▋ | 139280/371472 [11:03:49<18:02:38, 3.57it/s] {'loss': 3.1452, 'learning_rate': 6.628563425197699e-07, 'epoch': 6.0} + 37%|███▋ | 139280/371472 [11:03:49<18:02:38, 3.57it/s] 37%|███▋ | 139281/371472 [11:03:49<19:35:26, 3.29it/s] 37%|███▋ | 139282/371472 [11:03:49<19:05:25, 3.38it/s] 37%|███▋ | 139283/371472 [11:03:50<18:24:07, 3.50it/s] 37%|███▋ | 139284/371472 [11:03:50<17:56:53, 3.59it/s] 37%|███▋ | 139285/371472 [11:03:50<19:29:23, 3.31it/s] 37%|███▋ | 139286/371472 [11:03:51<19:13:43, 3.35it/s] 37%|███▋ | 139287/371472 [11:03:51<18:15:26, 3.53it/s] 37%|███▋ | 139288/371472 [11:03:51<17:56:54, 3.59it/s] 37%|███▋ | 139289/371472 [11:03:51<17:19:59, 3.72it/s] 37%|███▋ | 139290/371472 [11:03:52<17:18:00, 3.73it/s] 37%|███▋ | 139291/371472 [11:03:52<16:40:07, 3.87it/s] 37%|███▋ | 139292/371472 [11:03:52<17:05:36, 3.77it/s] 37%|███▋ | 139293/371472 [11:03:52<19:02:55, 3.39it/s] 37%|███▋ | 139294/371472 [11:03:53<18:55:47, 3.41it/s] 37%|███▋ | 139295/371472 [11:03:53<20:39:53, 3.12it/s] 37%|███▋ | 139296/371472 [11:03:54<22:09:43, 2.91it/s] 37%|███▋ | 139297/371472 [11:03:54<21:18:00, 3.03it/s] 37%|███▋ | 139298/371472 [11:03:54<20:34:27, 3.13it/s] 37%|███▋ | 139299/371472 [11:03:54<19:45:32, 3.26it/s] 37%|███▋ | 139300/371472 [11:03:55<19:14:56, 3.35it/s] {'loss': 3.2317, 'learning_rate': 6.62807860544291e-07, 'epoch': 6.0} + 37%|███▋ | 139300/371472 [11:03:55<19:14:56, 3.35it/s] 37%|███▋ | 139301/371472 [11:03:55<19:55:50, 3.24it/s] 38%|███▊ | 139302/371472 [11:03:55<20:07:19, 3.21it/s]Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} +/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 38%|███▊ | 139303/371472 [11:04:21<507:32:18, 7.87s/it] 38%|███▊ | 139304/371472 [11:04:21<363:33:56, 5.64s/it] 38%|███▊ | 139305/371472 [11:04:22<260:31:26, 4.04s/it] 38%|███▊ | 139306/371472 [11:04:22<187:22:21, 2.91s/it] 38%|███▊ | 139307/371472 [11:04:22<136:48:13, 2.12s/it] 38%|███▊ | 139308/371472 [11:04:22<100:26:25, 1.56s/it] 38%|███▊ | 139309/371472 [11:04:23<75:44:41, 1.17s/it] 38%|███▊ | 139310/371472 [11:04:23<58:09:03, 1.11it/s] 38%|███▊ | 139311/371472 [11:04:23<46:08:37, 1.40it/s] 38%|███▊ | 139312/371472 [11:04:23<37:15:15, 1.73it/s] 38%|███▊ | 139313/371472 [11:04:24<32:01:02, 2.01it/s] 38%|███▊ | 139314/371472 [11:04:24<28:04:03, 2.30it/s] 38%|███▊ | 139315/371472 [11:04:24<25:07:49, 2.57it/s] 38%|███▊ | 139316/371472 [11:04:25<24:09:22, 2.67it/s] 38%|███▊ | 139317/371472 [11:04:25<22:33:44, 2.86it/s] 38%|███▊ | 139318/371472 [11:04:25<20:58:21, 3.07it/s] 38%|███▊ | 139319/371472 [11:04:25<19:37:15, 3.29it/s] 38%|███▊ | 139320/371472 [11:04:26<18:29:27, 3.49it/s] {'loss': 3.1854, 'learning_rate': 6.627593785688121e-07, 'epoch': 6.0} + 38%|███▊ | 139320/371472 [11:04:26<18:29:27, 3.49it/s] 38%|███▊ | 139321/371472 [11:04:26<18:15:48, 3.53it/s] 38%|███▊ | 139322/371472 [11:04:26<17:25:05, 3.70it/s] 38%|███▊ | 139323/371472 [11:04:27<17:37:24, 3.66it/s] 38%|███▊ | 139324/371472 [11:04:27<17:33:15, 3.67it/s] 38%|███▊ | 139325/371472 [11:04:27<17:19:05, 3.72it/s] 38%|███▊ | 139326/371472 [11:04:27<18:21:44, 3.51it/s] 38%|███▊ | 139327/371472 [11:04:28<18:29:26, 3.49it/s] 38%|███▊ | 139328/371472 [11:04:28<17:58:38, 3.59it/s] 38%|███▊ | 139329/371472 [11:04:28<19:57:29, 3.23it/s] 38%|███▊ | 139330/371472 [11:04:29<18:23:08, 3.51it/s] 38%|███▊ | 139331/371472 [11:04:29<17:49:32, 3.62it/s] 38%|███▊ | 139332/371472 [11:04:29<17:33:21, 3.67it/s] 38%|███▊ | 139333/371472 [11:04:29<18:31:03, 3.48it/s] 38%|███▊ | 139334/371472 [11:04:30<18:10:32, 3.55it/s] 38%|███▊ | 139335/371472 [11:04:30<17:52:59, 3.61it/s] 38%|███▊ | 139336/371472 [11:04:30<17:38:16, 3.66it/s] 38%|███▊ | 139337/371472 [11:04:30<17:58:02, 3.59it/s] 38%|███▊ | 139338/371472 [11:04:31<17:34:55, 3.67it/s] 38%|███▊ | 139339/371472 [11:04:31<18:14:12, 3.54it/s] 38%|███▊ | 139340/371472 [11:04:31<18:07:24, 3.56it/s] {'loss': 3.0666, 'learning_rate': 6.627108965933331e-07, 'epoch': 6.0} + 38%|███▊ | 139340/371472 [11:04:31<18:07:24, 3.56it/s] 38%|███▊ | 139341/371472 [11:04:32<18:07:07, 3.56it/s] 38%|███▊ | 139342/371472 [11:04:32<17:34:52, 3.67it/s] 38%|███▊ | 139343/371472 [11:04:32<17:19:13, 3.72it/s] 38%|███▊ | 139344/371472 [11:04:32<16:45:02, 3.85it/s] 38%|███▊ | 139345/371472 [11:04:33<16:43:43, 3.85it/s] 38%|███▊ | 139346/371472 [11:04:33<16:37:19, 3.88it/s] 38%|███▊ | 139347/371472 [11:04:33<16:03:55, 4.01it/s] 38%|███▊ | 139348/371472 [11:04:33<17:10:12, 3.76it/s] 38%|███▊ | 139349/371472 [11:04:34<16:44:54, 3.85it/s] 38%|███▊ | 139350/371472 [11:04:34<16:17:09, 3.96it/s] 38%|███▊ | 139351/371472 [11:04:34<16:53:49, 3.82it/s] 38%|███▊ | 139352/371472 [11:04:34<16:47:46, 3.84it/s] 38%|███▊ | 139353/371472 [11:04:35<18:45:56, 3.44it/s] 38%|███▊ | 139354/371472 [11:04:35<18:37:40, 3.46it/s] 38%|███▊ | 139355/371472 [11:04:35<17:47:56, 3.62it/s] 38%|███▊ | 139356/371472 [11:04:36<17:10:27, 3.75it/s] 38%|███▊ | 139357/371472 [11:04:36<17:18:52, 3.72it/s] 38%|███▊ | 139358/371472 [11:04:36<17:05:36, 3.77it/s] 38%|███▊ | 139359/371472 [11:04:36<16:54:28, 3.81it/s] 38%|███▊ | 139360/371472 [11:04:37<16:49:40, 3.83it/s] {'loss': 3.1272, 'learning_rate': 6.626624146178543e-07, 'epoch': 6.0} + 38%|███▊ | 139360/371472 [11:04:37<16:49:40, 3.83it/s] 38%|███▊ | 139361/371472 [11:04:37<16:56:06, 3.81it/s] 38%|███▊ | 139362/371472 [11:04:37<16:51:35, 3.82it/s] 38%|███▊ | 139363/371472 [11:04:37<17:50:38, 3.61it/s] 38%|███▊ | 139364/371472 [11:04:38<18:00:17, 3.58it/s] 38%|███▊ | 139365/371472 [11:04:38<17:49:30, 3.62it/s] 38%|███▊ | 139366/371472 [11:04:38<17:54:05, 3.60it/s] 38%|███▊ | 139367/371472 [11:04:39<18:35:21, 3.47it/s] 38%|███▊ | 139368/371472 [11:04:39<17:44:10, 3.64it/s] 38%|███▊ | 139369/371472 [11:04:39<17:50:49, 3.61it/s] 38%|███▊ | 139370/371472 [11:04:39<19:05:36, 3.38it/s] 38%|███▊ | 139371/371472 [11:04:40<19:59:46, 3.22it/s] 38%|███▊ | 139372/371472 [11:04:40<23:08:34, 2.79it/s] 38%|███▊ | 139373/371472 [11:04:41<21:22:31, 3.02it/s] 38%|███▊ | 139374/371472 [11:04:41<20:11:42, 3.19it/s] 38%|███▊ | 139375/371472 [11:04:41<19:19:32, 3.34it/s] 38%|███▊ | 139376/371472 [11:04:41<18:52:28, 3.42it/s] 38%|███▊ | 139377/371472 [11:04:42<18:28:25, 3.49it/s] 38%|███▊ | 139378/371472 [11:04:42<19:03:45, 3.38it/s] 38%|███▊ | 139379/371472 [11:04:42<18:39:40, 3.45it/s] 38%|███▊ | 139380/371472 [11:04:42<18:36:26, 3.46it/s] {'loss': 3.2, 'learning_rate': 6.626139326423754e-07, 'epoch': 6.0} + 38%|███▊ | 139380/371472 [11:04:42<18:36:26, 3.46it/s] 38%|███▊ | 139381/371472 [11:04:43<18:46:17, 3.43it/s] 38%|███▊ | 139382/371472 [11:04:43<18:46:19, 3.43it/s] 38%|███▊ | 139383/371472 [11:04:43<18:59:03, 3.40it/s] 38%|███▊ | 139384/371472 [11:04:44<18:13:27, 3.54it/s] 38%|███▊ | 139385/371472 [11:04:44<19:21:12, 3.33it/s] 38%|███▊ | 139386/371472 [11:04:44<19:29:05, 3.31it/s] 38%|███▊ | 139387/371472 [11:04:45<18:44:35, 3.44it/s] 38%|███▊ | 139388/371472 [11:04:45<18:38:47, 3.46it/s] 38%|███▊ | 139389/371472 [11:04:45<18:23:05, 3.51it/s] 38%|███▊ | 139390/371472 [11:04:45<18:48:21, 3.43it/s] 38%|███▊ | 139391/371472 [11:04:46<20:19:52, 3.17it/s] 38%|███▊ | 139392/371472 [11:04:46<19:19:00, 3.34it/s] 38%|███▊ | 139393/371472 [11:04:46<18:35:44, 3.47it/s] 38%|███▊ | 139394/371472 [11:04:47<17:48:48, 3.62it/s] 38%|███▊ | 139395/371472 [11:04:47<17:45:40, 3.63it/s] 38%|███▊ | 139396/371472 [11:04:47<16:54:21, 3.81it/s] 38%|███▊ | 139397/371472 [11:04:47<17:24:46, 3.70it/s] 38%|███▊ | 139398/371472 [11:04:48<17:20:30, 3.72it/s] 38%|███▊ | 139399/371472 [11:04:48<17:43:39, 3.64it/s] 38%|███▊ | 139400/371472 [11:04:48<17:48:25, 3.62it/s] {'loss': 3.1094, 'learning_rate': 6.625654506668965e-07, 'epoch': 6.0} + 38%|███▊ | 139400/371472 [11:04:48<17:48:25, 3.62it/s] 38%|███▊ | 139401/371472 [11:04:49<18:33:02, 3.48it/s] 38%|███▊ | 139402/371472 [11:04:49<18:01:07, 3.58it/s] 38%|███▊ | 139403/371472 [11:04:49<17:32:59, 3.67it/s] 38%|███▊ | 139404/371472 [11:04:49<17:14:28, 3.74it/s] 38%|███▊ | 139405/371472 [11:04:50<16:57:45, 3.80it/s] 38%|███▊ | 139406/371472 [11:04:50<17:38:45, 3.65it/s] 38%|███▊ | 139407/371472 [11:04:50<17:22:11, 3.71it/s] 38%|███▊ | 139408/371472 [11:04:50<17:09:21, 3.76it/s] 38%|███▊ | 139409/371472 [11:04:51<17:25:19, 3.70it/s] 38%|███▊ | 139410/371472 [11:04:51<18:15:58, 3.53it/s] 38%|███▊ | 139411/371472 [11:04:51<18:03:38, 3.57it/s] 38%|███▊ | 139412/371472 [11:04:52<18:09:13, 3.55it/s] 38%|███▊ | 139413/371472 [11:04:52<17:41:12, 3.64it/s] 38%|███▊ | 139414/371472 [11:04:52<16:57:49, 3.80it/s] 38%|███▊ | 139415/371472 [11:04:52<16:46:27, 3.84it/s] 38%|███▊ | 139416/371472 [11:04:53<17:41:25, 3.64it/s] 38%|███▊ | 139417/371472 [11:04:53<17:58:16, 3.59it/s] 38%|███▊ | 139418/371472 [11:04:53<17:49:02, 3.62it/s] 38%|███▊ | 139419/371472 [11:04:53<17:54:33, 3.60it/s] 38%|███▊ | 139420/371472 [11:04:54<17:36:57, 3.66it/s] {'loss': 3.1583, 'learning_rate': 6.625169686914176e-07, 'epoch': 6.01} + 38%|███▊ | 139420/371472 [11:04:54<17:36:57, 3.66it/s] 38%|███▊ | 139421/371472 [11:04:54<18:07:19, 3.56it/s] 38%|███▊ | 139422/371472 [11:04:54<17:58:35, 3.59it/s] 38%|███▊ | 139423/371472 [11:04:55<18:53:59, 3.41it/s] 38%|███▊ | 139424/371472 [11:04:55<18:32:09, 3.48it/s] 38%|███▊ | 139425/371472 [11:04:55<18:13:58, 3.54it/s] 38%|███▊ | 139426/371472 [11:04:55<18:27:42, 3.49it/s] 38%|███▊ | 139427/371472 [11:04:56<18:08:28, 3.55it/s] 38%|███▊ | 139428/371472 [11:04:56<18:49:42, 3.42it/s] 38%|███▊ | 139429/371472 [11:04:56<18:58:07, 3.40it/s] 38%|███▊ | 139430/371472 [11:04:57<18:05:48, 3.56it/s] 38%|███▊ | 139431/371472 [11:04:57<18:09:32, 3.55it/s] 38%|███▊ | 139432/371472 [11:04:57<17:59:00, 3.58it/s] 38%|███▊ | 139433/371472 [11:04:57<17:17:52, 3.73it/s] 38%|███▊ | 139434/371472 [11:04:58<17:31:53, 3.68it/s] 38%|███▊ | 139435/371472 [11:04:58<18:26:14, 3.50it/s] 38%|███▊ | 139436/371472 [11:04:58<19:08:29, 3.37it/s] 38%|███▊ | 139437/371472 [11:04:59<19:13:07, 3.35it/s] 38%|███▊ | 139438/371472 [11:04:59<19:09:27, 3.36it/s] 38%|███▊ | 139439/371472 [11:04:59<18:59:22, 3.39it/s] 38%|███▊ | 139440/371472 [11:04:59<19:51:21, 3.25it/s] {'loss': 3.0964, 'learning_rate': 6.624684867159388e-07, 'epoch': 6.01} + 38%|███▊ | 139440/371472 [11:04:59<19:51:21, 3.25it/s] 38%|███▊ | 139441/371472 [11:05:00<19:36:54, 3.29it/s] 38%|███▊ | 139442/371472 [11:05:00<19:32:27, 3.30it/s] 38%|███▊ | 139443/371472 [11:05:00<18:32:11, 3.48it/s] 38%|███▊ | 139444/371472 [11:05:01<18:38:01, 3.46it/s] 38%|███▊ | 139445/371472 [11:05:01<17:44:03, 3.63it/s] 38%|███▊ | 139446/371472 [11:05:01<17:03:27, 3.78it/s] 38%|███▊ | 139447/371472 [11:05:01<17:17:07, 3.73it/s] 38%|███▊ | 139448/371472 [11:05:02<17:06:49, 3.77it/s] 38%|███▊ | 139449/371472 [11:05:02<18:14:22, 3.53it/s] 38%|███▊ | 139450/371472 [11:05:02<18:54:18, 3.41it/s] 38%|███▊ | 139451/371472 [11:05:03<19:55:20, 3.24it/s] 38%|███▊ | 139452/371472 [11:05:03<19:29:05, 3.31it/s] 38%|███▊ | 139453/371472 [11:05:03<19:00:52, 3.39it/s] 38%|███▊ | 139454/371472 [11:05:03<18:15:46, 3.53it/s] 38%|███▊ | 139455/371472 [11:05:04<18:01:25, 3.58it/s] 38%|███▊ | 139456/371472 [11:05:04<18:46:57, 3.43it/s] 38%|███▊ | 139457/371472 [11:05:04<19:03:02, 3.38it/s] 38%|███▊ | 139458/371472 [11:05:05<18:42:22, 3.45it/s] 38%|███▊ | 139459/371472 [11:05:05<18:19:37, 3.52it/s] 38%|███▊ | 139460/371472 [11:05:05<18:24:39, 3.50it/s] {'loss': 3.1498, 'learning_rate': 6.624200047404596e-07, 'epoch': 6.01} + 38%|███▊ | 139460/371472 [11:05:05<18:24:39, 3.50it/s] 38%|███▊ | 139461/371472 [11:05:05<18:46:12, 3.43it/s] 38%|███▊ | 139462/371472 [11:05:06<18:27:44, 3.49it/s] 38%|███▊ | 139463/371472 [11:05:06<18:03:21, 3.57it/s] 38%|███▊ | 139464/371472 [11:05:06<19:06:40, 3.37it/s] 38%|███▊ | 139465/371472 [11:05:07<19:00:45, 3.39it/s] 38%|███▊ | 139466/371472 [11:05:07<20:14:27, 3.18it/s] 38%|███▊ | 139467/371472 [11:05:07<19:04:24, 3.38it/s] 38%|███▊ | 139468/371472 [11:05:08<18:42:21, 3.45it/s] 38%|███▊ | 139469/371472 [11:05:08<18:45:54, 3.43it/s] 38%|███▊ | 139470/371472 [11:05:08<21:31:25, 2.99it/s] 38%|███▊ | 139471/371472 [11:05:09<20:27:48, 3.15it/s] 38%|███▊ | 139472/371472 [11:05:09<20:02:12, 3.22it/s] 38%|███▊ | 139473/371472 [11:05:09<19:15:53, 3.35it/s] 38%|███▊ | 139474/371472 [11:05:09<19:08:34, 3.37it/s] 38%|███▊ | 139475/371472 [11:05:10<19:27:51, 3.31it/s] 38%|███▊ | 139476/371472 [11:05:10<18:47:52, 3.43it/s] 38%|███▊ | 139477/371472 [11:05:10<19:24:34, 3.32it/s] 38%|███▊ | 139478/371472 [11:05:11<20:04:00, 3.21it/s] 38%|███▊ | 139479/371472 [11:05:11<19:09:07, 3.36it/s] 38%|███▊ | 139480/371472 [11:05:11<20:01:51, 3.22it/s] {'loss': 3.2131, 'learning_rate': 6.623715227649808e-07, 'epoch': 6.01} + 38%|███▊ | 139480/371472 [11:05:11<20:01:51, 3.22it/s] 38%|███▊ | 139481/371472 [11:05:12<19:03:31, 3.38it/s] 38%|███▊ | 139482/371472 [11:05:12<18:29:07, 3.49it/s] 38%|███▊ | 139483/371472 [11:05:12<18:07:45, 3.55it/s] 38%|███▊ | 139484/371472 [11:05:12<18:34:57, 3.47it/s] 38%|███▊ | 139485/371472 [11:05:13<18:19:16, 3.52it/s] 38%|███▊ | 139486/371472 [11:05:13<17:38:46, 3.65it/s] 38%|███▊ | 139487/371472 [11:05:13<18:30:43, 3.48it/s] 38%|███▊ | 139488/371472 [11:05:13<18:30:03, 3.48it/s] 38%|███▊ | 139489/371472 [11:05:14<18:06:46, 3.56it/s] 38%|███▊ | 139490/371472 [11:05:14<18:50:31, 3.42it/s] 38%|███▊ | 139491/371472 [11:05:14<18:01:59, 3.57it/s] 38%|███▊ | 139492/371472 [11:05:15<19:33:47, 3.29it/s] 38%|███▊ | 139493/371472 [11:05:15<19:02:54, 3.38it/s] 38%|███▊ | 139494/371472 [11:05:15<18:51:34, 3.42it/s] 38%|███▊ | 139495/371472 [11:05:16<18:17:03, 3.52it/s] 38%|███▊ | 139496/371472 [11:05:16<18:17:15, 3.52it/s] 38%|███▊ | 139497/371472 [11:05:16<17:49:37, 3.61it/s] 38%|███▊ | 139498/371472 [11:05:16<19:39:15, 3.28it/s] 38%|███▊ | 139499/371472 [11:05:17<20:24:00, 3.16it/s] 38%|███▊ | 139500/371472 [11:05:17<19:10:21, 3.36it/s] {'loss': 3.0729, 'learning_rate': 6.62323040789502e-07, 'epoch': 6.01} + 38%|███▊ | 139500/371472 [11:05:17<19:10:21, 3.36it/s] 38%|███▊ | 139501/371472 [11:05:17<19:17:33, 3.34it/s] 38%|███▊ | 139502/371472 [11:05:18<18:57:50, 3.40it/s] 38%|███▊ | 139503/371472 [11:05:18<18:50:41, 3.42it/s] 38%|███▊ | 139504/371472 [11:05:18<18:43:56, 3.44it/s] 38%|███▊ | 139505/371472 [11:05:18<18:56:50, 3.40it/s] 38%|███▊ | 139506/371472 [11:05:19<18:23:36, 3.50it/s] 38%|███▊ | 139507/371472 [11:05:19<17:38:08, 3.65it/s] 38%|███▊ | 139508/371472 [11:05:19<18:16:28, 3.53it/s] 38%|███▊ | 139509/371472 [11:05:20<17:36:33, 3.66it/s] \ No newline at end of file