diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -3886,3 +3886,1152 @@ Time to load utils op: 0.0003948211669921875 seconds [2022-12-17 07:00:04,837] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt. [2022-12-17 07:00:04,837] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt [2022-12-17 07:00:04,837] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +[2022-12-17 07:02:11,279] [INFO] [timer.py:197:stop] 0/6002, RunningAvgSamplesPerSec=6.3329282976802075, CurrSamplesPerSec=5.409848420510742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:02:22,520] [INFO] [timer.py:197:stop] 0/6004, RunningAvgSamplesPerSec=6.3329409702913235, CurrSamplesPerSec=5.727924201320441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:02:33,779] [INFO] [timer.py:197:stop] 0/6006, RunningAvgSamplesPerSec=6.332945347375377, CurrSamplesPerSec=5.715656804046321, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:02:45,055] [INFO] [timer.py:197:stop] 0/6008, RunningAvgSamplesPerSec=6.332949877724141, CurrSamplesPerSec=5.705035329354477, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:02:56,286] [INFO] [timer.py:197:stop] 0/6010, RunningAvgSamplesPerSec=6.332960763399706, CurrSamplesPerSec=5.712214510521705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:03:07,626] [INFO] [timer.py:197:stop] 0/6012, RunningAvgSamplesPerSec=6.332954862322034, CurrSamplesPerSec=5.664988709294761, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:03:19,134] [INFO] [timer.py:197:stop] 0/6014, RunningAvgSamplesPerSec=6.332956478155806, CurrSamplesPerSec=5.691613234976137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:03:30,422] [INFO] [timer.py:197:stop] 0/6016, RunningAvgSamplesPerSec=6.332958805969218, CurrSamplesPerSec=5.706581425626131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:03:41,720] [INFO] [timer.py:197:stop] 0/6018, RunningAvgSamplesPerSec=6.332955750474672, CurrSamplesPerSec=5.677985828400935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:03:53,059] [INFO] [logging.py:68:log_dist] [Rank 0] step=3010, skipped=5, lr=[4.4355555555555555e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:03:53,061] [INFO] [timer.py:197:stop] 0/6020, RunningAvgSamplesPerSec=6.332947512441389, CurrSamplesPerSec=5.67887616091657, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:04:04,348] [INFO] [timer.py:197:stop] 0/6022, RunningAvgSamplesPerSec=6.332950262424399, CurrSamplesPerSec=5.712307379262475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:04:15,663] [INFO] [timer.py:197:stop] 0/6024, RunningAvgSamplesPerSec=6.33294522532113, CurrSamplesPerSec=5.6699262183186665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:04:26,966] [INFO] [timer.py:197:stop] 0/6026, RunningAvgSamplesPerSec=6.332944727752335, CurrSamplesPerSec=5.705308394311921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:04:38,261] [INFO] [timer.py:197:stop] 0/6028, RunningAvgSamplesPerSec=6.332945641278926, CurrSamplesPerSec=5.7136246414556835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:04:49,577] [INFO] [timer.py:197:stop] 0/6030, RunningAvgSamplesPerSec=6.332943066924493, CurrSamplesPerSec=5.7022036403463865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:05:00,866] [INFO] [timer.py:197:stop] 0/6032, RunningAvgSamplesPerSec=6.332945423624379, CurrSamplesPerSec=5.714399670907678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:05:12,187] [INFO] [timer.py:197:stop] 0/6034, RunningAvgSamplesPerSec=6.332938365767992, CurrSamplesPerSec=5.655726374511836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:05:23,507] [INFO] [timer.py:197:stop] 0/6036, RunningAvgSamplesPerSec=6.332939157128889, CurrSamplesPerSec=5.696587613256186, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:05:34,745] [INFO] [timer.py:197:stop] 0/6038, RunningAvgSamplesPerSec=6.332946002518051, CurrSamplesPerSec=5.708284216463061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:05:46,032] [INFO] [logging.py:68:log_dist] [Rank 0] step=3020, skipped=5, lr=[4.413333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:05:46,034] [INFO] [timer.py:197:stop] 0/6040, RunningAvgSamplesPerSec=6.332948857092477, CurrSamplesPerSec=5.705083344211374, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:05:57,250] [INFO] [timer.py:197:stop] 0/6042, RunningAvgSamplesPerSec=6.332963769215607, CurrSamplesPerSec=5.7226780454749555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:06:08,579] [INFO] [timer.py:197:stop] 0/6044, RunningAvgSamplesPerSec=6.33295502373841, CurrSamplesPerSec=5.639287550383446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:06:19,844] [INFO] [timer.py:197:stop] 0/6046, RunningAvgSamplesPerSec=6.332962754710209, CurrSamplesPerSec=5.697369877713746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:06:31,099] [INFO] [timer.py:197:stop] 0/6048, RunningAvgSamplesPerSec=6.3329694666057845, CurrSamplesPerSec=5.7139761275044565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:06:42,329] [INFO] [timer.py:197:stop] 0/6050, RunningAvgSamplesPerSec=6.332981066028764, CurrSamplesPerSec=5.725672752341051, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.001, 'learning_rate': 4.402222222222223e-06, 'epoch': 12.82} +[2022-12-17 07:06:53,602] [INFO] [timer.py:197:stop] 0/6052, RunningAvgSamplesPerSec=6.33299022450167, CurrSamplesPerSec=5.722167644273479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:07:04,833] [INFO] [timer.py:197:stop] 0/6054, RunningAvgSamplesPerSec=6.333001195047098, CurrSamplesPerSec=5.7341029003935455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:07:16,058] [INFO] [timer.py:197:stop] 0/6056, RunningAvgSamplesPerSec=6.333013578299217, CurrSamplesPerSec=5.719579224118402, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:07:27,356] [INFO] [timer.py:197:stop] 0/6058, RunningAvgSamplesPerSec=6.33301585082342, CurrSamplesPerSec=5.710740198407118, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:07:38,636] [INFO] [logging.py:68:log_dist] [Rank 0] step=3030, skipped=5, lr=[4.391111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:07:38,637] [INFO] [timer.py:197:stop] 0/6060, RunningAvgSamplesPerSec=6.333019960453741, CurrSamplesPerSec=5.7041610183311, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:07:49,915] [INFO] [timer.py:197:stop] 0/6062, RunningAvgSamplesPerSec=6.333021954913878, CurrSamplesPerSec=5.697483305755978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:08:01,206] [INFO] [timer.py:197:stop] 0/6064, RunningAvgSamplesPerSec=6.33302445189601, CurrSamplesPerSec=5.690578240712397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:08:12,512] [INFO] [timer.py:197:stop] 0/6066, RunningAvgSamplesPerSec=6.333023758845632, CurrSamplesPerSec=5.696591481732633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:08:23,807] [INFO] [timer.py:197:stop] 0/6068, RunningAvgSamplesPerSec=6.33302566180591, CurrSamplesPerSec=5.695524948344975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:08:35,246] [INFO] [timer.py:197:stop] 0/6070, RunningAvgSamplesPerSec=6.333017665904463, CurrSamplesPerSec=5.65430465557005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:08:46,519] [INFO] [timer.py:197:stop] 0/6072, RunningAvgSamplesPerSec=6.333020403951812, CurrSamplesPerSec=5.721240762130593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:08:57,822] [INFO] [timer.py:197:stop] 0/6074, RunningAvgSamplesPerSec=6.333017693939095, CurrSamplesPerSec=5.673858777116046, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:09:09,109] [INFO] [timer.py:197:stop] 0/6076, RunningAvgSamplesPerSec=6.333026959977511, CurrSamplesPerSec=5.734302071672384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:09:20,412] [INFO] [timer.py:197:stop] 0/6078, RunningAvgSamplesPerSec=6.3330330568762045, CurrSamplesPerSec=5.706936413481388, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:09:31,826] [INFO] [logging.py:68:log_dist] [Rank 0] step=3040, skipped=5, lr=[4.368888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:09:31,828] [INFO] [timer.py:197:stop] 0/6080, RunningAvgSamplesPerSec=6.333031876153911, CurrSamplesPerSec=5.6814634566546145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:09:43,109] [INFO] [timer.py:197:stop] 0/6082, RunningAvgSamplesPerSec=6.333036190320956, CurrSamplesPerSec=5.707240238570252, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:09:54,383] [INFO] [timer.py:197:stop] 0/6084, RunningAvgSamplesPerSec=6.3330414775741195, CurrSamplesPerSec=5.690312615092251, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:10:05,694] [INFO] [timer.py:197:stop] 0/6086, RunningAvgSamplesPerSec=6.333040218328877, CurrSamplesPerSec=5.67669863130965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:10:17,049] [INFO] [timer.py:197:stop] 0/6088, RunningAvgSamplesPerSec=6.333038286994798, CurrSamplesPerSec=5.681935592463785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:10:28,450] [INFO] [timer.py:197:stop] 0/6090, RunningAvgSamplesPerSec=6.33303059688638, CurrSamplesPerSec=5.686027674333798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:10:39,797] [INFO] [timer.py:197:stop] 0/6092, RunningAvgSamplesPerSec=6.333025536818845, CurrSamplesPerSec=5.65996479562066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:10:51,237] [INFO] [timer.py:197:stop] 0/6094, RunningAvgSamplesPerSec=6.333013486821785, CurrSamplesPerSec=5.6392449014850685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:11:02,636] [INFO] [timer.py:197:stop] 0/6096, RunningAvgSamplesPerSec=6.333009741623534, CurrSamplesPerSec=5.685992746360228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:11:14,051] [INFO] [timer.py:197:stop] 0/6098, RunningAvgSamplesPerSec=6.333002641181561, CurrSamplesPerSec=5.677886386032248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:11:25,430] [INFO] [logging.py:68:log_dist] [Rank 0] step=3050, skipped=5, lr=[4.346666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:11:25,431] [INFO] [timer.py:197:stop] 0/6100, RunningAvgSamplesPerSec=6.333004553516022, CurrSamplesPerSec=5.699673908755347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0011, 'learning_rate': 4.346666666666667e-06, 'epoch': 12.92} +[2022-12-17 07:11:36,708] [INFO] [timer.py:197:stop] 0/6102, RunningAvgSamplesPerSec=6.3330077852363305, CurrSamplesPerSec=5.688169699341959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:11:48,001] [INFO] [timer.py:197:stop] 0/6104, RunningAvgSamplesPerSec=6.333009660501565, CurrSamplesPerSec=5.695481202896487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:11:59,318] [INFO] [timer.py:197:stop] 0/6106, RunningAvgSamplesPerSec=6.333012871056986, CurrSamplesPerSec=5.700673231938462, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:12:10,622] [INFO] [timer.py:197:stop] 0/6108, RunningAvgSamplesPerSec=6.333013058024148, CurrSamplesPerSec=5.679696830576498, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:12:21,970] [INFO] [timer.py:197:stop] 0/6110, RunningAvgSamplesPerSec=6.333018333144457, CurrSamplesPerSec=5.708892186073563, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:12:33,264] [INFO] [timer.py:197:stop] 0/6112, RunningAvgSamplesPerSec=6.333019560946497, CurrSamplesPerSec=5.696909198295653, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:12:44,676] [INFO] [timer.py:197:stop] 0/6114, RunningAvgSamplesPerSec=6.333009485645984, CurrSamplesPerSec=5.6825918544581535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:12:56,137] [INFO] [timer.py:197:stop] 0/6116, RunningAvgSamplesPerSec=6.333001857085381, CurrSamplesPerSec=5.661122634552742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:13:07,404] [INFO] [timer.py:197:stop] 0/6118, RunningAvgSamplesPerSec=6.333009218347228, CurrSamplesPerSec=5.716704107126596, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:13:18,808] [INFO] [logging.py:68:log_dist] [Rank 0] step=3060, skipped=5, lr=[4.324444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:13:18,810] [INFO] [timer.py:197:stop] 0/6120, RunningAvgSamplesPerSec=6.33299932992847, CurrSamplesPerSec=5.631900206846387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:13:30,131] [INFO] [timer.py:197:stop] 0/6122, RunningAvgSamplesPerSec=6.333000625987813, CurrSamplesPerSec=5.7233598609995715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:13:41,492] [INFO] [timer.py:197:stop] 0/6124, RunningAvgSamplesPerSec=6.332999071187653, CurrSamplesPerSec=5.69576326408152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:13:52,896] [INFO] [timer.py:197:stop] 0/6126, RunningAvgSamplesPerSec=6.333000799392758, CurrSamplesPerSec=5.694567535841868, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:14:04,138] [INFO] [timer.py:197:stop] 0/6128, RunningAvgSamplesPerSec=6.333013144982671, CurrSamplesPerSec=5.739862152879895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:14:15,464] [INFO] [timer.py:197:stop] 0/6130, RunningAvgSamplesPerSec=6.333011721516519, CurrSamplesPerSec=5.682956616907704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:14:26,778] [INFO] [timer.py:197:stop] 0/6132, RunningAvgSamplesPerSec=6.333017077437072, CurrSamplesPerSec=5.689464033835547, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:14:38,143] [INFO] [timer.py:197:stop] 0/6134, RunningAvgSamplesPerSec=6.333023205191618, CurrSamplesPerSec=5.701417626140478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:14:46,638] [INFO] [timer.py:197:stop] 0/6136, RunningAvgSamplesPerSec=6.333530345771324, CurrSamplesPerSec=10.181979836122121, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:14:57,900] [INFO] [timer.py:197:stop] 0/6138, RunningAvgSamplesPerSec=6.3335354485394, CurrSamplesPerSec=5.704494126868448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:15:09,204] [INFO] [logging.py:68:log_dist] [Rank 0] step=3070, skipped=5, lr=[4.302222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:15:09,205] [INFO] [timer.py:197:stop] 0/6140, RunningAvgSamplesPerSec=6.333539101452951, CurrSamplesPerSec=5.708758392510282, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:15:20,622] [INFO] [timer.py:197:stop] 0/6142, RunningAvgSamplesPerSec=6.333530860653627, CurrSamplesPerSec=5.651146410158156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:15:32,024] [INFO] [timer.py:197:stop] 0/6144, RunningAvgSamplesPerSec=6.333533492664965, CurrSamplesPerSec=5.692809654408539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:15:43,329] [INFO] [timer.py:197:stop] 0/6146, RunningAvgSamplesPerSec=6.333533025155378, CurrSamplesPerSec=5.6757631381319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:15:54,648] [INFO] [timer.py:197:stop] 0/6148, RunningAvgSamplesPerSec=6.333533683356346, CurrSamplesPerSec=5.689169332882951, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:16:05,989] [INFO] [timer.py:197:stop] 0/6150, RunningAvgSamplesPerSec=6.333535642050272, CurrSamplesPerSec=5.696211429020808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0011, 'learning_rate': 4.291111111111112e-06, 'epoch': 13.03} +[2022-12-17 07:16:17,328] [INFO] [timer.py:197:stop] 0/6152, RunningAvgSamplesPerSec=6.333536210137741, CurrSamplesPerSec=5.689893358266075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:16:28,635] [INFO] [timer.py:197:stop] 0/6154, RunningAvgSamplesPerSec=6.333535803272801, CurrSamplesPerSec=5.704164169830742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:16:39,912] [INFO] [timer.py:197:stop] 0/6156, RunningAvgSamplesPerSec=6.333549464531601, CurrSamplesPerSec=5.733308802683141, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:16:51,198] [INFO] [timer.py:197:stop] 0/6158, RunningAvgSamplesPerSec=6.333554253582397, CurrSamplesPerSec=5.711769171508369, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:17:02,479] [INFO] [logging.py:68:log_dist] [Rank 0] step=3080, skipped=5, lr=[4.2800000000000005e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:17:02,481] [INFO] [timer.py:197:stop] 0/6160, RunningAvgSamplesPerSec=6.333556008679909, CurrSamplesPerSec=5.691028244421208, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:17:13,718] [INFO] [timer.py:197:stop] 0/6162, RunningAvgSamplesPerSec=6.333570286672101, CurrSamplesPerSec=5.745796062654638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:17:25,031] [INFO] [timer.py:197:stop] 0/6164, RunningAvgSamplesPerSec=6.3335685820454595, CurrSamplesPerSec=5.7004892217010115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:17:36,288] [INFO] [timer.py:197:stop] 0/6166, RunningAvgSamplesPerSec=6.333575165962192, CurrSamplesPerSec=5.723583670512504, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:17:47,560] [INFO] [timer.py:197:stop] 0/6168, RunningAvgSamplesPerSec=6.333579138734967, CurrSamplesPerSec=5.716975368116917, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:17:58,863] [INFO] [timer.py:197:stop] 0/6170, RunningAvgSamplesPerSec=6.333576663658981, CurrSamplesPerSec=5.682725627333229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:18:10,131] [INFO] [timer.py:197:stop] 0/6172, RunningAvgSamplesPerSec=6.333581345778832, CurrSamplesPerSec=5.702568259797126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:18:21,401] [INFO] [timer.py:197:stop] 0/6174, RunningAvgSamplesPerSec=6.33358959330988, CurrSamplesPerSec=5.712314672746968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:18:32,668] [INFO] [timer.py:197:stop] 0/6176, RunningAvgSamplesPerSec=6.33359884866726, CurrSamplesPerSec=5.723382802499994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:18:43,931] [INFO] [timer.py:197:stop] 0/6178, RunningAvgSamplesPerSec=6.333606222769196, CurrSamplesPerSec=5.726166922511175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:18:55,195] [INFO] [logging.py:68:log_dist] [Rank 0] step=3090, skipped=5, lr=[4.257777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:18:55,196] [INFO] [timer.py:197:stop] 0/6180, RunningAvgSamplesPerSec=6.33361665814971, CurrSamplesPerSec=5.721152967814155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:19:06,466] [INFO] [timer.py:197:stop] 0/6182, RunningAvgSamplesPerSec=6.333620691197549, CurrSamplesPerSec=5.706928405749664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:19:17,733] [INFO] [timer.py:197:stop] 0/6184, RunningAvgSamplesPerSec=6.3336270853545695, CurrSamplesPerSec=5.704318112706603, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:19:28,980] [INFO] [timer.py:197:stop] 0/6186, RunningAvgSamplesPerSec=6.333636018909267, CurrSamplesPerSec=5.719854901930869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:19:40,211] [INFO] [timer.py:197:stop] 0/6188, RunningAvgSamplesPerSec=6.3336476936987385, CurrSamplesPerSec=5.715806499597796, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:19:51,493] [INFO] [timer.py:197:stop] 0/6190, RunningAvgSamplesPerSec=6.333649560930875, CurrSamplesPerSec=5.679310617680972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:20:02,757] [INFO] [timer.py:197:stop] 0/6192, RunningAvgSamplesPerSec=6.3336588565995875, CurrSamplesPerSec=5.72637311659954, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:20:14,044] [INFO] [timer.py:197:stop] 0/6194, RunningAvgSamplesPerSec=6.333664712617201, CurrSamplesPerSec=5.721329778563483, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:20:25,347] [INFO] [timer.py:197:stop] 0/6196, RunningAvgSamplesPerSec=6.333666722865129, CurrSamplesPerSec=5.703944300478775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:20:36,642] [INFO] [timer.py:197:stop] 0/6198, RunningAvgSamplesPerSec=6.333670839094798, CurrSamplesPerSec=5.695486278300802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:20:47,924] [INFO] [logging.py:68:log_dist] [Rank 0] step=3100, skipped=5, lr=[4.235555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:20:47,925] [INFO] [timer.py:197:stop] 0/6200, RunningAvgSamplesPerSec=6.333670497347703, CurrSamplesPerSec=5.6847792055710595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0012, 'learning_rate': 4.235555555555556e-06, 'epoch': 13.14} +[2022-12-17 07:20:59,185] [INFO] [timer.py:197:stop] 0/6202, RunningAvgSamplesPerSec=6.333676638307629, CurrSamplesPerSec=5.711012351730619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:21:10,462] [INFO] [timer.py:197:stop] 0/6204, RunningAvgSamplesPerSec=6.333679227024554, CurrSamplesPerSec=5.718343997147511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:21:21,830] [INFO] [timer.py:197:stop] 0/6206, RunningAvgSamplesPerSec=6.333662934242267, CurrSamplesPerSec=5.706011547748035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:21:33,171] [INFO] [timer.py:197:stop] 0/6208, RunningAvgSamplesPerSec=6.333652218039571, CurrSamplesPerSec=5.6182565704722425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:21:44,461] [INFO] [timer.py:197:stop] 0/6210, RunningAvgSamplesPerSec=6.333653845696366, CurrSamplesPerSec=5.702601695675352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:21:55,746] [INFO] [timer.py:197:stop] 0/6212, RunningAvgSamplesPerSec=6.333656373763029, CurrSamplesPerSec=5.700723594705585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:22:07,010] [INFO] [timer.py:197:stop] 0/6214, RunningAvgSamplesPerSec=6.333661056272008, CurrSamplesPerSec=5.720521903529705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:22:18,295] [INFO] [timer.py:197:stop] 0/6216, RunningAvgSamplesPerSec=6.333658649564784, CurrSamplesPerSec=5.707987562653582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:22:29,582] [INFO] [timer.py:197:stop] 0/6218, RunningAvgSamplesPerSec=6.333662760426689, CurrSamplesPerSec=5.727622080813498, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:22:40,929] [INFO] [logging.py:68:log_dist] [Rank 0] step=3110, skipped=5, lr=[4.213333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:22:40,930] [INFO] [timer.py:197:stop] 0/6220, RunningAvgSamplesPerSec=6.333655494934498, CurrSamplesPerSec=5.64942046159434, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:22:52,198] [INFO] [timer.py:197:stop] 0/6222, RunningAvgSamplesPerSec=6.333661744484601, CurrSamplesPerSec=5.711456600556849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:23:03,481] [INFO] [timer.py:197:stop] 0/6224, RunningAvgSamplesPerSec=6.333668847335543, CurrSamplesPerSec=5.705415832894714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:23:14,768] [INFO] [timer.py:197:stop] 0/6226, RunningAvgSamplesPerSec=6.33367446236183, CurrSamplesPerSec=5.7123348514844405, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:23:26,045] [INFO] [timer.py:197:stop] 0/6228, RunningAvgSamplesPerSec=6.333682561017407, CurrSamplesPerSec=5.717454398993657, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:23:37,302] [INFO] [timer.py:197:stop] 0/6230, RunningAvgSamplesPerSec=6.333695018702657, CurrSamplesPerSec=5.726601804187436, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:23:48,593] [INFO] [timer.py:197:stop] 0/6232, RunningAvgSamplesPerSec=6.333700599672269, CurrSamplesPerSec=5.713283656182034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:23:59,782] [INFO] [timer.py:197:stop] 0/6234, RunningAvgSamplesPerSec=6.333720792746628, CurrSamplesPerSec=5.753932606427263, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:24:11,040] [INFO] [timer.py:197:stop] 0/6236, RunningAvgSamplesPerSec=6.3337316048765695, CurrSamplesPerSec=5.736263165164779, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:24:22,293] [INFO] [timer.py:197:stop] 0/6238, RunningAvgSamplesPerSec=6.333743592965634, CurrSamplesPerSec=5.7273688716713105, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:24:33,556] [INFO] [logging.py:68:log_dist] [Rank 0] step=3120, skipped=5, lr=[4.191111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:24:33,558] [INFO] [timer.py:197:stop] 0/6240, RunningAvgSamplesPerSec=6.333753619937605, CurrSamplesPerSec=5.723275174287382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:24:44,838] [INFO] [timer.py:197:stop] 0/6242, RunningAvgSamplesPerSec=6.333759832425309, CurrSamplesPerSec=5.707526620904189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:24:56,106] [INFO] [timer.py:197:stop] 0/6244, RunningAvgSamplesPerSec=6.333768310071723, CurrSamplesPerSec=5.713693475802618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:25:07,395] [INFO] [timer.py:197:stop] 0/6246, RunningAvgSamplesPerSec=6.333772564543033, CurrSamplesPerSec=5.690272809574608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:25:18,669] [INFO] [timer.py:197:stop] 0/6248, RunningAvgSamplesPerSec=6.333780481683763, CurrSamplesPerSec=5.726913347117072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:25:29,909] [INFO] [timer.py:197:stop] 0/6250, RunningAvgSamplesPerSec=6.333792384979605, CurrSamplesPerSec=5.730079078308308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 4.18e-06, 'epoch': 13.24} +[2022-12-17 07:25:41,187] [INFO] [timer.py:197:stop] 0/6252, RunningAvgSamplesPerSec=6.3337997647527295, CurrSamplesPerSec=5.725025061804615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:25:52,455] [INFO] [timer.py:197:stop] 0/6254, RunningAvgSamplesPerSec=6.333808840927152, CurrSamplesPerSec=5.7160613651848236, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:26:03,721] [INFO] [timer.py:197:stop] 0/6256, RunningAvgSamplesPerSec=6.333819140364993, CurrSamplesPerSec=5.741936852855393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:26:15,002] [INFO] [timer.py:197:stop] 0/6258, RunningAvgSamplesPerSec=6.3338262787236195, CurrSamplesPerSec=5.726260978399147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:26:26,272] [INFO] [logging.py:68:log_dist] [Rank 0] step=3130, skipped=5, lr=[4.168888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:26:26,274] [INFO] [timer.py:197:stop] 0/6260, RunningAvgSamplesPerSec=6.333834613812638, CurrSamplesPerSec=5.731404795279654, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:26:37,516] [INFO] [timer.py:197:stop] 0/6262, RunningAvgSamplesPerSec=6.333851044396006, CurrSamplesPerSec=5.754929086728061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:26:48,838] [INFO] [timer.py:197:stop] 0/6264, RunningAvgSamplesPerSec=6.333848800608768, CurrSamplesPerSec=5.688091595166303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:27:00,106] [INFO] [timer.py:197:stop] 0/6266, RunningAvgSamplesPerSec=6.333856453734544, CurrSamplesPerSec=5.708961877763876, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:27:11,364] [INFO] [timer.py:197:stop] 0/6268, RunningAvgSamplesPerSec=6.333866410344925, CurrSamplesPerSec=5.719594092005034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:27:22,619] [INFO] [timer.py:197:stop] 0/6270, RunningAvgSamplesPerSec=6.333876826821105, CurrSamplesPerSec=5.723777473914621, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:27:33,875] [INFO] [timer.py:197:stop] 0/6272, RunningAvgSamplesPerSec=6.3338848330656905, CurrSamplesPerSec=5.716193797156096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:27:45,120] [INFO] [timer.py:197:stop] 0/6274, RunningAvgSamplesPerSec=6.333897185918975, CurrSamplesPerSec=5.74382648026717, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:27:56,403] [INFO] [timer.py:197:stop] 0/6276, RunningAvgSamplesPerSec=6.333902031352192, CurrSamplesPerSec=5.709184319767375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:28:07,662] [INFO] [timer.py:197:stop] 0/6278, RunningAvgSamplesPerSec=6.333911149775862, CurrSamplesPerSec=5.7330222760859595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:28:18,945] [INFO] [logging.py:68:log_dist] [Rank 0] step=3140, skipped=5, lr=[4.146666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:28:18,947] [INFO] [timer.py:197:stop] 0/6280, RunningAvgSamplesPerSec=6.33391300068281, CurrSamplesPerSec=5.699917897200058, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:28:30,221] [INFO] [timer.py:197:stop] 0/6282, RunningAvgSamplesPerSec=6.333919192214186, CurrSamplesPerSec=5.7230972671385505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:28:41,446] [INFO] [timer.py:197:stop] 0/6284, RunningAvgSamplesPerSec=6.333935240619337, CurrSamplesPerSec=5.736468370782495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:28:52,726] [INFO] [timer.py:197:stop] 0/6286, RunningAvgSamplesPerSec=6.333940726797692, CurrSamplesPerSec=5.702230046434748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:29:04,007] [INFO] [timer.py:197:stop] 0/6288, RunningAvgSamplesPerSec=6.333946524808699, CurrSamplesPerSec=5.7043353257336005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:29:15,255] [INFO] [timer.py:197:stop] 0/6290, RunningAvgSamplesPerSec=6.3339564607143695, CurrSamplesPerSec=5.712767148317302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:29:26,466] [INFO] [timer.py:197:stop] 0/6292, RunningAvgSamplesPerSec=6.333969665950145, CurrSamplesPerSec=5.738719976979741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:29:37,700] [INFO] [timer.py:197:stop] 0/6294, RunningAvgSamplesPerSec=6.333981120917072, CurrSamplesPerSec=5.711757018032768, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:29:48,954] [INFO] [timer.py:197:stop] 0/6296, RunningAvgSamplesPerSec=6.333993811173233, CurrSamplesPerSec=5.731998850847594, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:30:00,190] [INFO] [timer.py:197:stop] 0/6298, RunningAvgSamplesPerSec=6.334008001142226, CurrSamplesPerSec=5.7244090125781755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:30:11,437] [INFO] [logging.py:68:log_dist] [Rank 0] step=3150, skipped=5, lr=[4.124444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:30:11,439] [INFO] [timer.py:197:stop] 0/6300, RunningAvgSamplesPerSec=6.334020339685583, CurrSamplesPerSec=5.74459939878852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 4.124444444444445e-06, 'epoch': 13.35} +[2022-12-17 07:30:22,694] [INFO] [timer.py:197:stop] 0/6302, RunningAvgSamplesPerSec=6.334030713722951, CurrSamplesPerSec=5.7419351333448185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:30:33,920] [INFO] [timer.py:197:stop] 0/6304, RunningAvgSamplesPerSec=6.334044649015982, CurrSamplesPerSec=5.722702201516716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:30:45,181] [INFO] [timer.py:197:stop] 0/6306, RunningAvgSamplesPerSec=6.334050139124009, CurrSamplesPerSec=5.702820249985925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:30:56,440] [INFO] [timer.py:197:stop] 0/6308, RunningAvgSamplesPerSec=6.3340591029131605, CurrSamplesPerSec=5.72185320243758, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:31:07,721] [INFO] [timer.py:197:stop] 0/6310, RunningAvgSamplesPerSec=6.334064661655033, CurrSamplesPerSec=5.72226888775065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:31:19,006] [INFO] [timer.py:197:stop] 0/6312, RunningAvgSamplesPerSec=6.334071341269889, CurrSamplesPerSec=5.709138664317977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:31:30,271] [INFO] [timer.py:197:stop] 0/6314, RunningAvgSamplesPerSec=6.334080005450078, CurrSamplesPerSec=5.70330321265174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:31:41,558] [INFO] [timer.py:197:stop] 0/6316, RunningAvgSamplesPerSec=6.334084927353801, CurrSamplesPerSec=5.725893078676519, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:31:52,823] [INFO] [timer.py:197:stop] 0/6318, RunningAvgSamplesPerSec=6.334092224371551, CurrSamplesPerSec=5.735803528437858, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:32:04,092] [INFO] [logging.py:68:log_dist] [Rank 0] step=3160, skipped=5, lr=[4.102222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:32:04,094] [INFO] [timer.py:197:stop] 0/6320, RunningAvgSamplesPerSec=6.3340985988673655, CurrSamplesPerSec=5.723873648233309, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:32:15,331] [INFO] [timer.py:197:stop] 0/6322, RunningAvgSamplesPerSec=6.334108187268311, CurrSamplesPerSec=5.730364332160457, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:32:26,579] [INFO] [timer.py:197:stop] 0/6324, RunningAvgSamplesPerSec=6.334119480956959, CurrSamplesPerSec=5.7376999128728645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:32:37,841] [INFO] [timer.py:197:stop] 0/6326, RunningAvgSamplesPerSec=6.334127634300717, CurrSamplesPerSec=5.735759897419527, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:32:49,119] [INFO] [timer.py:197:stop] 0/6328, RunningAvgSamplesPerSec=6.3341327590317595, CurrSamplesPerSec=5.711272865637329, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:33:00,383] [INFO] [timer.py:197:stop] 0/6330, RunningAvgSamplesPerSec=6.334137526524288, CurrSamplesPerSec=5.701190703342258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:33:11,682] [INFO] [timer.py:197:stop] 0/6332, RunningAvgSamplesPerSec=6.334138129547782, CurrSamplesPerSec=5.707132003239794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:33:22,984] [INFO] [timer.py:197:stop] 0/6334, RunningAvgSamplesPerSec=6.334138679363636, CurrSamplesPerSec=5.695152529306642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:33:34,221] [INFO] [timer.py:197:stop] 0/6336, RunningAvgSamplesPerSec=6.334148557977573, CurrSamplesPerSec=5.72934625780531, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:33:45,505] [INFO] [timer.py:197:stop] 0/6338, RunningAvgSamplesPerSec=6.33415256108004, CurrSamplesPerSec=5.696907263841009, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:33:56,784] [INFO] [logging.py:68:log_dist] [Rank 0] step=3170, skipped=5, lr=[4.08e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:33:56,786] [INFO] [timer.py:197:stop] 0/6340, RunningAvgSamplesPerSec=6.334156801115452, CurrSamplesPerSec=5.69057727563347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:34:08,066] [INFO] [timer.py:197:stop] 0/6342, RunningAvgSamplesPerSec=6.3341588107996945, CurrSamplesPerSec=5.689759247721567, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:34:19,371] [INFO] [timer.py:197:stop] 0/6344, RunningAvgSamplesPerSec=6.3341594637141885, CurrSamplesPerSec=5.699542483160544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:34:30,652] [INFO] [timer.py:197:stop] 0/6346, RunningAvgSamplesPerSec=6.334164233327315, CurrSamplesPerSec=5.720801817491398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:34:41,916] [INFO] [timer.py:197:stop] 0/6348, RunningAvgSamplesPerSec=6.334172213651666, CurrSamplesPerSec=5.717038438768021, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:34:53,200] [INFO] [timer.py:197:stop] 0/6350, RunningAvgSamplesPerSec=6.334176607542711, CurrSamplesPerSec=5.696459231174649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 4.0688888888888896e-06, 'epoch': 13.45} +[2022-12-17 07:35:04,420] [INFO] [timer.py:197:stop] 0/6352, RunningAvgSamplesPerSec=6.334190474942335, CurrSamplesPerSec=5.750339362309045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:35:15,687] [INFO] [timer.py:197:stop] 0/6354, RunningAvgSamplesPerSec=6.334197592289525, CurrSamplesPerSec=5.708686763325248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:35:26,965] [INFO] [timer.py:197:stop] 0/6356, RunningAvgSamplesPerSec=6.334202876382074, CurrSamplesPerSec=5.696194506733333, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:35:38,242] [INFO] [timer.py:197:stop] 0/6358, RunningAvgSamplesPerSec=6.334208646005777, CurrSamplesPerSec=5.726033539458956, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:35:49,490] [INFO] [logging.py:68:log_dist] [Rank 0] step=3180, skipped=5, lr=[4.057777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:35:49,491] [INFO] [timer.py:197:stop] 0/6360, RunningAvgSamplesPerSec=6.334213859842294, CurrSamplesPerSec=5.705084314216812, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:36:00,801] [INFO] [timer.py:197:stop] 0/6362, RunningAvgSamplesPerSec=6.334213061028189, CurrSamplesPerSec=5.685610734607999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:36:12,079] [INFO] [timer.py:197:stop] 0/6364, RunningAvgSamplesPerSec=6.334218711653675, CurrSamplesPerSec=5.71221475362965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:36:23,314] [INFO] [timer.py:197:stop] 0/6366, RunningAvgSamplesPerSec=6.334226486091336, CurrSamplesPerSec=5.7237674661127995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:36:34,618] [INFO] [timer.py:197:stop] 0/6368, RunningAvgSamplesPerSec=6.334227102289113, CurrSamplesPerSec=5.70363379722427, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:36:45,904] [INFO] [timer.py:197:stop] 0/6370, RunningAvgSamplesPerSec=6.33423109422309, CurrSamplesPerSec=5.699825672921633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:36:57,178] [INFO] [timer.py:197:stop] 0/6372, RunningAvgSamplesPerSec=6.334237409680017, CurrSamplesPerSec=5.714431056001904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:37:08,435] [INFO] [timer.py:197:stop] 0/6374, RunningAvgSamplesPerSec=6.3342470259604955, CurrSamplesPerSec=5.737647422993051, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:37:19,668] [INFO] [timer.py:197:stop] 0/6376, RunningAvgSamplesPerSec=6.334258378697692, CurrSamplesPerSec=5.740801708702291, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:37:30,986] [INFO] [timer.py:197:stop] 0/6378, RunningAvgSamplesPerSec=6.334256167928714, CurrSamplesPerSec=5.688993057462939, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:37:42,214] [INFO] [logging.py:68:log_dist] [Rank 0] step=3190, skipped=5, lr=[4.035555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:37:42,215] [INFO] [timer.py:197:stop] 0/6380, RunningAvgSamplesPerSec=6.33426873374771, CurrSamplesPerSec=5.7413142117213685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:37:53,484] [INFO] [timer.py:197:stop] 0/6382, RunningAvgSamplesPerSec=6.334276375997644, CurrSamplesPerSec=5.715985414323117, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:38:04,779] [INFO] [timer.py:197:stop] 0/6384, RunningAvgSamplesPerSec=6.3342791658799245, CurrSamplesPerSec=5.701290963695899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:38:16,060] [INFO] [timer.py:197:stop] 0/6386, RunningAvgSamplesPerSec=6.334284560101654, CurrSamplesPerSec=5.7080103810826985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:38:27,421] [INFO] [timer.py:197:stop] 0/6388, RunningAvgSamplesPerSec=6.334288852463872, CurrSamplesPerSec=5.715398323846543, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:38:38,696] [INFO] [timer.py:197:stop] 0/6390, RunningAvgSamplesPerSec=6.334293701092073, CurrSamplesPerSec=5.723066274843333, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:38:49,974] [INFO] [timer.py:197:stop] 0/6392, RunningAvgSamplesPerSec=6.334300172161752, CurrSamplesPerSec=5.714413052107243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:39:01,254] [INFO] [timer.py:197:stop] 0/6394, RunningAvgSamplesPerSec=6.33430533183457, CurrSamplesPerSec=5.714190689032285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:39:12,552] [INFO] [timer.py:197:stop] 0/6396, RunningAvgSamplesPerSec=6.3343093155646, CurrSamplesPerSec=5.701755500915049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:39:23,875] [INFO] [timer.py:197:stop] 0/6398, RunningAvgSamplesPerSec=6.334306969316176, CurrSamplesPerSec=5.690710218338804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:39:35,165] [INFO] [logging.py:68:log_dist] [Rank 0] step=3200, skipped=5, lr=[4.013333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:39:35,166] [INFO] [timer.py:197:stop] 0/6400, RunningAvgSamplesPerSec=6.33431126623047, CurrSamplesPerSec=5.694712746053772, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 4.013333333333334e-06, 'epoch': 13.56} +[2022-12-17 07:39:46,431] [INFO] [timer.py:197:stop] 0/6402, RunningAvgSamplesPerSec=6.3343197508186675, CurrSamplesPerSec=5.718162254982803, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:39:57,734] [INFO] [timer.py:197:stop] 0/6404, RunningAvgSamplesPerSec=6.334320762396656, CurrSamplesPerSec=5.687556494617642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:40:09,051] [INFO] [timer.py:197:stop] 0/6406, RunningAvgSamplesPerSec=6.334318964649175, CurrSamplesPerSec=5.701802976134947, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:40:20,325] [INFO] [timer.py:197:stop] 0/6408, RunningAvgSamplesPerSec=6.334322601846689, CurrSamplesPerSec=5.715647311422073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:40:31,575] [INFO] [timer.py:197:stop] 0/6410, RunningAvgSamplesPerSec=6.334331751212934, CurrSamplesPerSec=5.721118338574796, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:40:42,836] [INFO] [timer.py:197:stop] 0/6412, RunningAvgSamplesPerSec=6.334342953194776, CurrSamplesPerSec=5.733783716239972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:40:54,089] [INFO] [timer.py:197:stop] 0/6414, RunningAvgSamplesPerSec=6.334353856926857, CurrSamplesPerSec=5.734053660919016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:41:05,379] [INFO] [timer.py:197:stop] 0/6416, RunningAvgSamplesPerSec=6.334357538593181, CurrSamplesPerSec=5.709000245343994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:41:16,633] [INFO] [timer.py:197:stop] 0/6418, RunningAvgSamplesPerSec=6.334366775817005, CurrSamplesPerSec=5.733293618503807, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:41:27,882] [INFO] [logging.py:68:log_dist] [Rank 0] step=3210, skipped=5, lr=[3.991111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:41:27,884] [INFO] [timer.py:197:stop] 0/6420, RunningAvgSamplesPerSec=6.334374605304082, CurrSamplesPerSec=5.72185320243758, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:41:39,158] [INFO] [timer.py:197:stop] 0/6422, RunningAvgSamplesPerSec=6.334383346014213, CurrSamplesPerSec=5.7169899789547145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:41:50,454] [INFO] [timer.py:197:stop] 0/6424, RunningAvgSamplesPerSec=6.334385831502931, CurrSamplesPerSec=5.7094944561622984, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:42:01,719] [INFO] [timer.py:197:stop] 0/6426, RunningAvgSamplesPerSec=6.33439479212694, CurrSamplesPerSec=5.7225472647897115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:42:12,981] [INFO] [timer.py:197:stop] 0/6428, RunningAvgSamplesPerSec=6.3344037562703885, CurrSamplesPerSec=5.7429112440335315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:42:24,242] [INFO] [timer.py:197:stop] 0/6430, RunningAvgSamplesPerSec=6.334409786537378, CurrSamplesPerSec=5.708665153517289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:42:35,613] [INFO] [timer.py:197:stop] 0/6432, RunningAvgSamplesPerSec=6.334397375003188, CurrSamplesPerSec=5.613414707336939, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:42:46,882] [INFO] [timer.py:197:stop] 0/6434, RunningAvgSamplesPerSec=6.3344080132693055, CurrSamplesPerSec=5.7186039621197216, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:42:58,199] [INFO] [timer.py:197:stop] 0/6436, RunningAvgSamplesPerSec=6.334413883868553, CurrSamplesPerSec=5.711313208535354, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:43:09,484] [INFO] [timer.py:197:stop] 0/6438, RunningAvgSamplesPerSec=6.334418881235654, CurrSamplesPerSec=5.697821198138527, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:43:20,807] [INFO] [logging.py:68:log_dist] [Rank 0] step=3220, skipped=5, lr=[3.96888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:43:20,808] [INFO] [timer.py:197:stop] 0/6440, RunningAvgSamplesPerSec=6.334424521474871, CurrSamplesPerSec=5.701281760907762, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:43:32,071] [INFO] [timer.py:197:stop] 0/6442, RunningAvgSamplesPerSec=6.334429620798979, CurrSamplesPerSec=5.703727114321208, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:43:43,375] [INFO] [timer.py:197:stop] 0/6444, RunningAvgSamplesPerSec=6.33442966271726, CurrSamplesPerSec=5.703901395205888, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:43:54,693] [INFO] [timer.py:197:stop] 0/6446, RunningAvgSamplesPerSec=6.334435735573179, CurrSamplesPerSec=5.708214784084248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:44:05,979] [INFO] [timer.py:197:stop] 0/6448, RunningAvgSamplesPerSec=6.334439283399943, CurrSamplesPerSec=5.710361899779623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:44:17,256] [INFO] [timer.py:197:stop] 0/6450, RunningAvgSamplesPerSec=6.334443402553889, CurrSamplesPerSec=5.689446186919232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.001, 'learning_rate': 3.9577777777777785e-06, 'epoch': 13.67} +[2022-12-17 07:44:28,529] [INFO] [timer.py:197:stop] 0/6452, RunningAvgSamplesPerSec=6.334449873358714, CurrSamplesPerSec=5.7057466624302915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:44:39,835] [INFO] [timer.py:197:stop] 0/6454, RunningAvgSamplesPerSec=6.334449871263617, CurrSamplesPerSec=5.665630778327832, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:44:51,094] [INFO] [timer.py:197:stop] 0/6456, RunningAvgSamplesPerSec=6.33445870308113, CurrSamplesPerSec=5.711911371015564, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:45:02,323] [INFO] [timer.py:197:stop] 0/6458, RunningAvgSamplesPerSec=6.33447117020378, CurrSamplesPerSec=5.742178085831228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:45:13,566] [INFO] [logging.py:68:log_dist] [Rank 0] step=3230, skipped=5, lr=[3.946666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:45:13,567] [INFO] [timer.py:197:stop] 0/6460, RunningAvgSamplesPerSec=6.334483320833476, CurrSamplesPerSec=5.7602422312557096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:45:24,855] [INFO] [timer.py:197:stop] 0/6462, RunningAvgSamplesPerSec=6.334487090458282, CurrSamplesPerSec=5.703172588988866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:45:36,144] [INFO] [timer.py:197:stop] 0/6464, RunningAvgSamplesPerSec=6.334490368647347, CurrSamplesPerSec=5.711960717114719, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:45:47,420] [INFO] [timer.py:197:stop] 0/6466, RunningAvgSamplesPerSec=6.334496714801158, CurrSamplesPerSec=5.722351836883863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:45:58,691] [INFO] [timer.py:197:stop] 0/6468, RunningAvgSamplesPerSec=6.334500277246964, CurrSamplesPerSec=5.710934347992327, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:46:10,029] [INFO] [timer.py:197:stop] 0/6470, RunningAvgSamplesPerSec=6.33449421971024, CurrSamplesPerSec=5.6767440094893145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:46:21,319] [INFO] [timer.py:197:stop] 0/6472, RunningAvgSamplesPerSec=6.334497096531263, CurrSamplesPerSec=5.714800890829148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:46:32,622] [INFO] [timer.py:197:stop] 0/6474, RunningAvgSamplesPerSec=6.3345016732355335, CurrSamplesPerSec=5.739231617535441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:46:43,911] [INFO] [timer.py:197:stop] 0/6476, RunningAvgSamplesPerSec=6.3345069214220695, CurrSamplesPerSec=5.722443327687395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:46:55,203] [INFO] [timer.py:197:stop] 0/6478, RunningAvgSamplesPerSec=6.334509599437047, CurrSamplesPerSec=5.711947833568165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:47:06,498] [INFO] [logging.py:68:log_dist] [Rank 0] step=3240, skipped=5, lr=[3.924444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:47:06,499] [INFO] [timer.py:197:stop] 0/6480, RunningAvgSamplesPerSec=6.334507862901814, CurrSamplesPerSec=5.703892911188816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:47:17,846] [INFO] [timer.py:197:stop] 0/6482, RunningAvgSamplesPerSec=6.334500014738551, CurrSamplesPerSec=5.675805381111495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:47:29,169] [INFO] [timer.py:197:stop] 0/6484, RunningAvgSamplesPerSec=6.33449821623731, CurrSamplesPerSec=5.6873121173197685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:47:40,463] [INFO] [timer.py:197:stop] 0/6486, RunningAvgSamplesPerSec=6.334501226706766, CurrSamplesPerSec=5.689940877367955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:47:51,756] [INFO] [timer.py:197:stop] 0/6488, RunningAvgSamplesPerSec=6.334501235383202, CurrSamplesPerSec=5.696078470899154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:48:03,015] [INFO] [timer.py:197:stop] 0/6490, RunningAvgSamplesPerSec=6.334510794702494, CurrSamplesPerSec=5.733341375468196, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:48:14,308] [INFO] [timer.py:197:stop] 0/6492, RunningAvgSamplesPerSec=6.334513328568358, CurrSamplesPerSec=5.700342748359202, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:48:25,572] [INFO] [timer.py:197:stop] 0/6494, RunningAvgSamplesPerSec=6.334521013057932, CurrSamplesPerSec=5.73964688643047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:48:36,884] [INFO] [timer.py:197:stop] 0/6496, RunningAvgSamplesPerSec=6.3345225257764195, CurrSamplesPerSec=5.704793084901866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:48:48,174] [INFO] [timer.py:197:stop] 0/6498, RunningAvgSamplesPerSec=6.334527174656914, CurrSamplesPerSec=5.710437458312825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:48:59,439] [INFO] [logging.py:68:log_dist] [Rank 0] step=3250, skipped=5, lr=[3.9022222222222225e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:48:59,441] [INFO] [timer.py:197:stop] 0/6500, RunningAvgSamplesPerSec=6.334536038481936, CurrSamplesPerSec=5.748160360542781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0006, 'learning_rate': 3.9022222222222225e-06, 'epoch': 13.77} +[2022-12-17 07:49:10,677] [INFO] [timer.py:197:stop] 0/6502, RunningAvgSamplesPerSec=6.334544401376434, CurrSamplesPerSec=5.718898065329056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:49:21,950] [INFO] [timer.py:197:stop] 0/6504, RunningAvgSamplesPerSec=6.334551849421232, CurrSamplesPerSec=5.719225098781024, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:49:33,208] [INFO] [timer.py:197:stop] 0/6506, RunningAvgSamplesPerSec=6.334561672364955, CurrSamplesPerSec=5.728841513197815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:49:44,451] [INFO] [timer.py:197:stop] 0/6508, RunningAvgSamplesPerSec=6.334570523742911, CurrSamplesPerSec=5.724698585593349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:49:55,715] [INFO] [timer.py:197:stop] 0/6510, RunningAvgSamplesPerSec=6.3345792677214945, CurrSamplesPerSec=5.7236586030173005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:50:06,968] [INFO] [timer.py:197:stop] 0/6512, RunningAvgSamplesPerSec=6.334590058357528, CurrSamplesPerSec=5.7514309615628045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:50:18,211] [INFO] [timer.py:197:stop] 0/6514, RunningAvgSamplesPerSec=6.334603074445066, CurrSamplesPerSec=5.724986722728296, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:50:29,481] [INFO] [timer.py:197:stop] 0/6516, RunningAvgSamplesPerSec=6.334610934163331, CurrSamplesPerSec=5.715269822617539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:50:40,752] [INFO] [timer.py:197:stop] 0/6518, RunningAvgSamplesPerSec=6.33461945488174, CurrSamplesPerSec=5.720193746608345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:50:52,044] [INFO] [logging.py:68:log_dist] [Rank 0] step=3260, skipped=5, lr=[3.88e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:50:52,046] [INFO] [timer.py:197:stop] 0/6520, RunningAvgSamplesPerSec=6.334621780396799, CurrSamplesPerSec=5.6912591854839985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:51:03,283] [INFO] [timer.py:197:stop] 0/6522, RunningAvgSamplesPerSec=6.334632276362033, CurrSamplesPerSec=5.730913880609149, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:51:14,584] [INFO] [timer.py:197:stop] 0/6524, RunningAvgSamplesPerSec=6.334633506617112, CurrSamplesPerSec=5.701871525934077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:51:25,919] [INFO] [timer.py:197:stop] 0/6526, RunningAvgSamplesPerSec=6.334625725853474, CurrSamplesPerSec=5.659491053787917, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:51:37,204] [INFO] [timer.py:197:stop] 0/6528, RunningAvgSamplesPerSec=6.334630479939639, CurrSamplesPerSec=5.708790201300999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:51:48,768] [INFO] [timer.py:197:stop] 0/6530, RunningAvgSamplesPerSec=6.334633340870514, CurrSamplesPerSec=5.693425200013472, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:52:00,152] [INFO] [timer.py:197:stop] 0/6532, RunningAvgSamplesPerSec=6.334636467388313, CurrSamplesPerSec=5.699997536839119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:52:11,436] [INFO] [timer.py:197:stop] 0/6534, RunningAvgSamplesPerSec=6.334641379479118, CurrSamplesPerSec=5.7103485375461345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:52:22,712] [INFO] [timer.py:197:stop] 0/6536, RunningAvgSamplesPerSec=6.334648530792128, CurrSamplesPerSec=5.719040620022333, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:52:33,970] [INFO] [timer.py:197:stop] 0/6538, RunningAvgSamplesPerSec=6.334656893317896, CurrSamplesPerSec=5.719474176171054, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:52:45,257] [INFO] [logging.py:68:log_dist] [Rank 0] step=3270, skipped=5, lr=[3.857777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:52:45,258] [INFO] [timer.py:197:stop] 0/6540, RunningAvgSamplesPerSec=6.334657147519695, CurrSamplesPerSec=5.688442357582676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:52:56,557] [INFO] [timer.py:197:stop] 0/6542, RunningAvgSamplesPerSec=6.334659911914299, CurrSamplesPerSec=5.699580966270709, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:53:07,861] [INFO] [timer.py:197:stop] 0/6544, RunningAvgSamplesPerSec=6.334661752510299, CurrSamplesPerSec=5.7100145020316795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:53:19,167] [INFO] [timer.py:197:stop] 0/6546, RunningAvgSamplesPerSec=6.334660035834661, CurrSamplesPerSec=5.6919149478993525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:53:30,477] [INFO] [timer.py:197:stop] 0/6548, RunningAvgSamplesPerSec=6.334660203676872, CurrSamplesPerSec=5.688878761592103, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:53:41,778] [INFO] [timer.py:197:stop] 0/6550, RunningAvgSamplesPerSec=6.334663627328773, CurrSamplesPerSec=5.694641468901246, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 3.8466666666666665e-06, 'epoch': 13.88} +[2022-12-17 07:53:53,070] [INFO] [timer.py:197:stop] 0/6552, RunningAvgSamplesPerSec=6.334667550444578, CurrSamplesPerSec=5.712499933497748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:54:04,382] [INFO] [timer.py:197:stop] 0/6554, RunningAvgSamplesPerSec=6.334666729151781, CurrSamplesPerSec=5.688070140999738, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:54:15,653] [INFO] [timer.py:197:stop] 0/6556, RunningAvgSamplesPerSec=6.334670797917401, CurrSamplesPerSec=5.713245960593941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:54:26,951] [INFO] [timer.py:197:stop] 0/6558, RunningAvgSamplesPerSec=6.334672746645493, CurrSamplesPerSec=5.701442329605782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:54:38,234] [INFO] [logging.py:68:log_dist] [Rank 0] step=3280, skipped=5, lr=[3.835555555555555e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:54:38,236] [INFO] [timer.py:197:stop] 0/6560, RunningAvgSamplesPerSec=6.334678289214605, CurrSamplesPerSec=5.713492571716677, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:54:49,540] [INFO] [timer.py:197:stop] 0/6562, RunningAvgSamplesPerSec=6.334680555512083, CurrSamplesPerSec=5.716451618907167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:55:00,820] [INFO] [timer.py:197:stop] 0/6564, RunningAvgSamplesPerSec=6.334688242733154, CurrSamplesPerSec=5.713890258754805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:55:12,112] [INFO] [timer.py:197:stop] 0/6566, RunningAvgSamplesPerSec=6.334691767360219, CurrSamplesPerSec=5.691641956668833, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:55:23,390] [INFO] [timer.py:197:stop] 0/6568, RunningAvgSamplesPerSec=6.334699487992292, CurrSamplesPerSec=5.71009806797846, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:55:34,687] [INFO] [timer.py:197:stop] 0/6570, RunningAvgSamplesPerSec=6.334702403865535, CurrSamplesPerSec=5.699421712497568, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:55:45,980] [INFO] [timer.py:197:stop] 0/6572, RunningAvgSamplesPerSec=6.334709420601028, CurrSamplesPerSec=5.705565720115627, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:55:57,281] [INFO] [timer.py:197:stop] 0/6574, RunningAvgSamplesPerSec=6.334713597124701, CurrSamplesPerSec=5.721482942087314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:56:08,612] [INFO] [timer.py:197:stop] 0/6576, RunningAvgSamplesPerSec=6.3347120968318675, CurrSamplesPerSec=5.698075429921804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:56:19,883] [INFO] [timer.py:197:stop] 0/6578, RunningAvgSamplesPerSec=6.334716724978661, CurrSamplesPerSec=5.717221327172613, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:56:31,166] [INFO] [logging.py:68:log_dist] [Rank 0] step=3290, skipped=5, lr=[3.813333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:56:31,168] [INFO] [timer.py:197:stop] 0/6580, RunningAvgSamplesPerSec=6.334719108089491, CurrSamplesPerSec=5.703603015371135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:56:42,501] [INFO] [timer.py:197:stop] 0/6582, RunningAvgSamplesPerSec=6.334714513118618, CurrSamplesPerSec=5.671661365575104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:56:53,787] [INFO] [timer.py:197:stop] 0/6584, RunningAvgSamplesPerSec=6.3347188763533175, CurrSamplesPerSec=5.7126526246192935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:57:05,046] [INFO] [timer.py:197:stop] 0/6586, RunningAvgSamplesPerSec=6.334725509508845, CurrSamplesPerSec=5.709292875815929, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:57:16,350] [INFO] [timer.py:197:stop] 0/6588, RunningAvgSamplesPerSec=6.334726599341239, CurrSamplesPerSec=5.693161964928307, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:57:27,660] [INFO] [timer.py:197:stop] 0/6590, RunningAvgSamplesPerSec=6.334723566293667, CurrSamplesPerSec=5.688183440113247, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:57:38,951] [INFO] [timer.py:197:stop] 0/6592, RunningAvgSamplesPerSec=6.334723994163866, CurrSamplesPerSec=5.709301861624632, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:57:50,223] [INFO] [timer.py:197:stop] 0/6594, RunningAvgSamplesPerSec=6.334728347281103, CurrSamplesPerSec=5.715075621498381, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:58:01,518] [INFO] [timer.py:197:stop] 0/6596, RunningAvgSamplesPerSec=6.334731330666477, CurrSamplesPerSec=5.703867701687004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:58:12,859] [INFO] [timer.py:197:stop] 0/6598, RunningAvgSamplesPerSec=6.334727303189047, CurrSamplesPerSec=5.682588726750454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:58:24,162] [INFO] [logging.py:68:log_dist] [Rank 0] step=3300, skipped=5, lr=[3.7911111111111114e-06], mom=[[0.9, 0.999]] +[2022-12-17 07:58:24,163] [INFO] [timer.py:197:stop] 0/6600, RunningAvgSamplesPerSec=6.334726599953564, CurrSamplesPerSec=5.699277714088104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0006, 'learning_rate': 3.7911111111111114e-06, 'epoch': 13.98} +[2022-12-17 07:58:35,460] [INFO] [timer.py:197:stop] 0/6602, RunningAvgSamplesPerSec=6.334731408166863, CurrSamplesPerSec=5.712972378490019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:58:46,758] [INFO] [timer.py:197:stop] 0/6604, RunningAvgSamplesPerSec=6.334731845338654, CurrSamplesPerSec=5.707407938636204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:58:58,047] [INFO] [timer.py:197:stop] 0/6606, RunningAvgSamplesPerSec=6.334734233740369, CurrSamplesPerSec=5.711525382674848, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:59:06,507] [INFO] [timer.py:197:stop] 0/6608, RunningAvgSamplesPerSec=6.335210928607283, CurrSamplesPerSec=10.252419732705375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:59:17,751] [INFO] [timer.py:197:stop] 0/6610, RunningAvgSamplesPerSec=6.335219406395689, CurrSamplesPerSec=5.717978088381363, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:59:29,040] [INFO] [timer.py:197:stop] 0/6612, RunningAvgSamplesPerSec=6.335223829599127, CurrSamplesPerSec=5.703502916051277, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:59:40,379] [INFO] [timer.py:197:stop] 0/6614, RunningAvgSamplesPerSec=6.335217971742134, CurrSamplesPerSec=5.668044202371438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 07:59:51,657] [INFO] [timer.py:197:stop] 0/6616, RunningAvgSamplesPerSec=6.335221810332066, CurrSamplesPerSec=5.710716629205853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:00:02,978] [INFO] [timer.py:197:stop] 0/6618, RunningAvgSamplesPerSec=6.335221454769381, CurrSamplesPerSec=5.683671119148693, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:00:14,226] [INFO] [logging.py:68:log_dist] [Rank 0] step=3310, skipped=5, lr=[3.768888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:00:14,227] [INFO] [timer.py:197:stop] 0/6620, RunningAvgSamplesPerSec=6.335230178121693, CurrSamplesPerSec=5.728086274241019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:00:25,541] [INFO] [timer.py:197:stop] 0/6622, RunningAvgSamplesPerSec=6.335231508364938, CurrSamplesPerSec=5.714036699364634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:00:36,864] [INFO] [timer.py:197:stop] 0/6624, RunningAvgSamplesPerSec=6.335230673922582, CurrSamplesPerSec=5.67315585182068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:00:48,140] [INFO] [timer.py:197:stop] 0/6626, RunningAvgSamplesPerSec=6.3352321982779465, CurrSamplesPerSec=5.700755314020435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:00:59,469] [INFO] [timer.py:197:stop] 0/6628, RunningAvgSamplesPerSec=6.3352286469104575, CurrSamplesPerSec=5.684976409828763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:01:10,759] [INFO] [timer.py:197:stop] 0/6630, RunningAvgSamplesPerSec=6.3352306933118365, CurrSamplesPerSec=5.70521526797899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:01:22,014] [INFO] [timer.py:197:stop] 0/6632, RunningAvgSamplesPerSec=6.335241592226106, CurrSamplesPerSec=5.721563185487121, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:01:33,293] [INFO] [timer.py:197:stop] 0/6634, RunningAvgSamplesPerSec=6.335247727416611, CurrSamplesPerSec=5.7200123743921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:01:44,582] [INFO] [timer.py:197:stop] 0/6636, RunningAvgSamplesPerSec=6.335252259272921, CurrSamplesPerSec=5.712915476704088, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:01:55,896] [INFO] [timer.py:197:stop] 0/6638, RunningAvgSamplesPerSec=6.335255563434, CurrSamplesPerSec=5.712214267413781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:02:07,209] [INFO] [logging.py:68:log_dist] [Rank 0] step=3320, skipped=5, lr=[3.7466666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:02:07,210] [INFO] [timer.py:197:stop] 0/6640, RunningAvgSamplesPerSec=6.335256820084927, CurrSamplesPerSec=5.699738534613269, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:02:18,535] [INFO] [timer.py:197:stop] 0/6642, RunningAvgSamplesPerSec=6.335251033360833, CurrSamplesPerSec=5.684267116288428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:02:29,824] [INFO] [timer.py:197:stop] 0/6644, RunningAvgSamplesPerSec=6.335255060875743, CurrSamplesPerSec=5.7108925524734895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:02:41,128] [INFO] [timer.py:197:stop] 0/6646, RunningAvgSamplesPerSec=6.335258861561374, CurrSamplesPerSec=5.713440523809875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:02:52,390] [INFO] [timer.py:197:stop] 0/6648, RunningAvgSamplesPerSec=6.335263542215526, CurrSamplesPerSec=5.708266494051981, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:03:03,906] [INFO] [timer.py:197:stop] 0/6650, RunningAvgSamplesPerSec=6.335263763525754, CurrSamplesPerSec=5.702244582000596, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0005, 'learning_rate': 3.7355555555555555e-06, 'epoch': 14.09} +[2022-12-17 08:03:15,189] [INFO] [timer.py:197:stop] 0/6652, RunningAvgSamplesPerSec=6.335268083035561, CurrSamplesPerSec=5.705293600602794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:03:26,497] [INFO] [timer.py:197:stop] 0/6654, RunningAvgSamplesPerSec=6.335269735440721, CurrSamplesPerSec=5.711725176171957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:03:37,786] [INFO] [timer.py:197:stop] 0/6656, RunningAvgSamplesPerSec=6.335274164895733, CurrSamplesPerSec=5.725324466139221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:03:49,173] [INFO] [timer.py:197:stop] 0/6658, RunningAvgSamplesPerSec=6.335257735532661, CurrSamplesPerSec=5.597545279286038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:04:00,490] [INFO] [logging.py:68:log_dist] [Rank 0] step=3330, skipped=5, lr=[3.724444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:04:00,492] [INFO] [timer.py:197:stop] 0/6660, RunningAvgSamplesPerSec=6.33525684867156, CurrSamplesPerSec=5.705849508753836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:04:11,766] [INFO] [timer.py:197:stop] 0/6662, RunningAvgSamplesPerSec=6.335262246882107, CurrSamplesPerSec=5.711982594968425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:04:23,073] [INFO] [timer.py:197:stop] 0/6664, RunningAvgSamplesPerSec=6.335263028881804, CurrSamplesPerSec=5.69388579921607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:04:34,384] [INFO] [timer.py:197:stop] 0/6666, RunningAvgSamplesPerSec=6.335262645151982, CurrSamplesPerSec=5.6963992731641575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:04:45,654] [INFO] [timer.py:197:stop] 0/6668, RunningAvgSamplesPerSec=6.3352687746241605, CurrSamplesPerSec=5.7199967730182735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:04:56,965] [INFO] [timer.py:197:stop] 0/6670, RunningAvgSamplesPerSec=6.335270488377442, CurrSamplesPerSec=5.689561470325925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:05:08,276] [INFO] [timer.py:197:stop] 0/6672, RunningAvgSamplesPerSec=6.335270114949652, CurrSamplesPerSec=5.708504663321607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:05:19,597] [INFO] [timer.py:197:stop] 0/6674, RunningAvgSamplesPerSec=6.335268466370297, CurrSamplesPerSec=5.690876948369703, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:05:31,093] [INFO] [timer.py:197:stop] 0/6676, RunningAvgSamplesPerSec=6.3352654854506785, CurrSamplesPerSec=5.707486816894073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:05:42,393] [INFO] [timer.py:197:stop] 0/6678, RunningAvgSamplesPerSec=6.335264715430377, CurrSamplesPerSec=5.688169699341959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:05:53,698] [INFO] [logging.py:68:log_dist] [Rank 0] step=3340, skipped=5, lr=[3.7022222222222227e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:05:53,700] [INFO] [timer.py:197:stop] 0/6680, RunningAvgSamplesPerSec=6.335266006192747, CurrSamplesPerSec=5.705141302615219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:06:05,038] [INFO] [timer.py:197:stop] 0/6682, RunningAvgSamplesPerSec=6.335261982342033, CurrSamplesPerSec=5.68633843174893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:06:16,383] [INFO] [timer.py:197:stop] 0/6684, RunningAvgSamplesPerSec=6.335256750990903, CurrSamplesPerSec=5.697760969574792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:06:27,700] [INFO] [timer.py:197:stop] 0/6686, RunningAvgSamplesPerSec=6.3352566792083405, CurrSamplesPerSec=5.706313818730529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:06:39,031] [INFO] [timer.py:197:stop] 0/6688, RunningAvgSamplesPerSec=6.335252849571285, CurrSamplesPerSec=5.701574569442484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:06:50,306] [INFO] [timer.py:197:stop] 0/6690, RunningAvgSamplesPerSec=6.335259835192428, CurrSamplesPerSec=5.716095689794135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:07:01,675] [INFO] [timer.py:197:stop] 0/6692, RunningAvgSamplesPerSec=6.335252075091651, CurrSamplesPerSec=5.65181390420287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:07:12,987] [INFO] [timer.py:197:stop] 0/6694, RunningAvgSamplesPerSec=6.335255091903392, CurrSamplesPerSec=5.731070739015337, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:07:24,290] [INFO] [timer.py:197:stop] 0/6696, RunningAvgSamplesPerSec=6.335256305080493, CurrSamplesPerSec=5.6998179271863325, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:07:35,587] [INFO] [timer.py:197:stop] 0/6698, RunningAvgSamplesPerSec=6.335258540880882, CurrSamplesPerSec=5.696401449037628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:07:46,914] [INFO] [logging.py:68:log_dist] [Rank 0] step=3350, skipped=5, lr=[3.6800000000000003e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:07:46,916] [INFO] [timer.py:197:stop] 0/6700, RunningAvgSamplesPerSec=6.335257708318872, CurrSamplesPerSec=5.687760881501747, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0005, 'learning_rate': 3.6800000000000003e-06, 'epoch': 14.19} +[2022-12-17 08:07:58,231] [INFO] [timer.py:197:stop] 0/6702, RunningAvgSamplesPerSec=6.33525974504948, CurrSamplesPerSec=5.714890436985326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:08:09,564] [INFO] [timer.py:197:stop] 0/6704, RunningAvgSamplesPerSec=6.335257964839845, CurrSamplesPerSec=5.67029342892191, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:08:20,858] [INFO] [timer.py:197:stop] 0/6706, RunningAvgSamplesPerSec=6.33526392682798, CurrSamplesPerSec=5.707362068898067, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:08:32,139] [INFO] [timer.py:197:stop] 0/6708, RunningAvgSamplesPerSec=6.33527052873353, CurrSamplesPerSec=5.71907424934495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:08:43,389] [INFO] [timer.py:197:stop] 0/6710, RunningAvgSamplesPerSec=6.335281135421937, CurrSamplesPerSec=5.738554357497209, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:08:54,673] [INFO] [timer.py:197:stop] 0/6712, RunningAvgSamplesPerSec=6.335286597623496, CurrSamplesPerSec=5.707688997892389, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:09:05,934] [INFO] [timer.py:197:stop] 0/6714, RunningAvgSamplesPerSec=6.335290896389518, CurrSamplesPerSec=5.718155921013812, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:09:17,219] [INFO] [timer.py:197:stop] 0/6716, RunningAvgSamplesPerSec=6.335293824979662, CurrSamplesPerSec=5.723692042585808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:09:28,508] [INFO] [timer.py:197:stop] 0/6718, RunningAvgSamplesPerSec=6.33529850860076, CurrSamplesPerSec=5.698207513618382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:09:39,774] [INFO] [logging.py:68:log_dist] [Rank 0] step=3360, skipped=5, lr=[3.657777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:09:39,776] [INFO] [timer.py:197:stop] 0/6720, RunningAvgSamplesPerSec=6.335304443900086, CurrSamplesPerSec=5.721201742101746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:09:50,989] [INFO] [timer.py:197:stop] 0/6722, RunningAvgSamplesPerSec=6.335321342992387, CurrSamplesPerSec=5.75129787762289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:10:02,250] [INFO] [timer.py:197:stop] 0/6724, RunningAvgSamplesPerSec=6.335331044349278, CurrSamplesPerSec=5.7417568011259545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:10:13,520] [INFO] [timer.py:197:stop] 0/6726, RunningAvgSamplesPerSec=6.335336258451266, CurrSamplesPerSec=5.7119300884010835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:10:24,824] [INFO] [timer.py:197:stop] 0/6728, RunningAvgSamplesPerSec=6.3353355264456335, CurrSamplesPerSec=5.691331825993934, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:10:36,091] [INFO] [timer.py:197:stop] 0/6730, RunningAvgSamplesPerSec=6.335344092952332, CurrSamplesPerSec=5.740056569296319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:10:47,363] [INFO] [timer.py:197:stop] 0/6732, RunningAvgSamplesPerSec=6.33535210505159, CurrSamplesPerSec=5.723860954990893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:10:58,619] [INFO] [timer.py:197:stop] 0/6734, RunningAvgSamplesPerSec=6.33535971186609, CurrSamplesPerSec=5.7344738159501825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:11:09,901] [INFO] [timer.py:197:stop] 0/6736, RunningAvgSamplesPerSec=6.335368220679748, CurrSamplesPerSec=5.742782485543321, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:11:21,153] [INFO] [timer.py:197:stop] 0/6738, RunningAvgSamplesPerSec=6.335375761893769, CurrSamplesPerSec=5.738013891085249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:11:32,462] [INFO] [logging.py:68:log_dist] [Rank 0] step=3370, skipped=5, lr=[3.635555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:11:32,463] [INFO] [timer.py:197:stop] 0/6740, RunningAvgSamplesPerSec=6.335381584313619, CurrSamplesPerSec=5.710524924063667, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:11:43,708] [INFO] [timer.py:197:stop] 0/6742, RunningAvgSamplesPerSec=6.335393113987534, CurrSamplesPerSec=5.749184147746339, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:11:54,944] [INFO] [timer.py:197:stop] 0/6744, RunningAvgSamplesPerSec=6.335403399337236, CurrSamplesPerSec=5.731368818118538, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:12:06,243] [INFO] [timer.py:197:stop] 0/6746, RunningAvgSamplesPerSec=6.335405800521349, CurrSamplesPerSec=5.707700648614486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:12:17,513] [INFO] [timer.py:197:stop] 0/6748, RunningAvgSamplesPerSec=6.33541325872857, CurrSamplesPerSec=5.699587743224427, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:12:28,805] [INFO] [timer.py:197:stop] 0/6750, RunningAvgSamplesPerSec=6.335419206168785, CurrSamplesPerSec=5.72087448256261, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0009, 'learning_rate': 3.624444444444445e-06, 'epoch': 14.3} +[2022-12-17 08:12:40,100] [INFO] [timer.py:197:stop] 0/6752, RunningAvgSamplesPerSec=6.335425900995575, CurrSamplesPerSec=5.729924964973949, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:12:51,350] [INFO] [timer.py:197:stop] 0/6754, RunningAvgSamplesPerSec=6.335434128041205, CurrSamplesPerSec=5.727344187421743, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:13:02,597] [INFO] [timer.py:197:stop] 0/6756, RunningAvgSamplesPerSec=6.335443808382267, CurrSamplesPerSec=5.728703359756582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:13:13,907] [INFO] [timer.py:197:stop] 0/6758, RunningAvgSamplesPerSec=6.335447032977318, CurrSamplesPerSec=5.70918893392257, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:13:25,194] [INFO] [logging.py:68:log_dist] [Rank 0] step=3380, skipped=5, lr=[3.6133333333333336e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:13:25,196] [INFO] [timer.py:197:stop] 0/6760, RunningAvgSamplesPerSec=6.33545070601345, CurrSamplesPerSec=5.69207354103689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:13:36,570] [INFO] [timer.py:197:stop] 0/6762, RunningAvgSamplesPerSec=6.335457147352796, CurrSamplesPerSec=5.711385146683424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:13:47,814] [INFO] [timer.py:197:stop] 0/6764, RunningAvgSamplesPerSec=6.335465629789517, CurrSamplesPerSec=5.726456184764834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:13:59,082] [INFO] [timer.py:197:stop] 0/6766, RunningAvgSamplesPerSec=6.335473141102003, CurrSamplesPerSec=5.7204536359869085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:14:10,357] [INFO] [timer.py:197:stop] 0/6768, RunningAvgSamplesPerSec=6.3354813559791765, CurrSamplesPerSec=5.714565602211815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:14:21,632] [INFO] [timer.py:197:stop] 0/6770, RunningAvgSamplesPerSec=6.335488909631325, CurrSamplesPerSec=5.728411670041658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:14:32,882] [INFO] [timer.py:197:stop] 0/6772, RunningAvgSamplesPerSec=6.335497528237216, CurrSamplesPerSec=5.7178209714922055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:14:44,133] [INFO] [timer.py:197:stop] 0/6774, RunningAvgSamplesPerSec=6.335505795423574, CurrSamplesPerSec=5.716592103813912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:14:55,421] [INFO] [timer.py:197:stop] 0/6776, RunningAvgSamplesPerSec=6.3355121742745775, CurrSamplesPerSec=5.704353508621425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:15:06,682] [INFO] [timer.py:197:stop] 0/6778, RunningAvgSamplesPerSec=6.335520427485223, CurrSamplesPerSec=5.731632416359113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:15:17,930] [INFO] [logging.py:68:log_dist] [Rank 0] step=3390, skipped=5, lr=[3.5911111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:15:17,931] [INFO] [timer.py:197:stop] 0/6780, RunningAvgSamplesPerSec=6.335527528500903, CurrSamplesPerSec=5.716105183907788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:15:29,226] [INFO] [timer.py:197:stop] 0/6782, RunningAvgSamplesPerSec=6.335529292704897, CurrSamplesPerSec=5.703166045845869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:15:40,494] [INFO] [timer.py:197:stop] 0/6784, RunningAvgSamplesPerSec=6.335539846623111, CurrSamplesPerSec=5.7389356646810805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:15:51,770] [INFO] [timer.py:197:stop] 0/6786, RunningAvgSamplesPerSec=6.3355470731281125, CurrSamplesPerSec=5.725897964153275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:16:03,056] [INFO] [timer.py:197:stop] 0/6788, RunningAvgSamplesPerSec=6.335558698275204, CurrSamplesPerSec=5.750312755131069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:16:14,329] [INFO] [timer.py:197:stop] 0/6790, RunningAvgSamplesPerSec=6.335565162906134, CurrSamplesPerSec=5.72310068363258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:16:25,568] [INFO] [timer.py:197:stop] 0/6792, RunningAvgSamplesPerSec=6.3355781705712575, CurrSamplesPerSec=5.7392797188928295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:16:36,788] [INFO] [timer.py:197:stop] 0/6794, RunningAvgSamplesPerSec=6.3355899210501265, CurrSamplesPerSec=5.747177050776454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:16:48,027] [INFO] [timer.py:197:stop] 0/6796, RunningAvgSamplesPerSec=6.3356043056202145, CurrSamplesPerSec=5.745012248058093, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:16:59,286] [INFO] [timer.py:197:stop] 0/6798, RunningAvgSamplesPerSec=6.335614858480439, CurrSamplesPerSec=5.725945353710456, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:17:10,581] [INFO] [logging.py:68:log_dist] [Rank 0] step=3400, skipped=5, lr=[3.568888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:17:10,583] [INFO] [timer.py:197:stop] 0/6800, RunningAvgSamplesPerSec=6.335619836173475, CurrSamplesPerSec=5.717412507986601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0008, 'learning_rate': 3.568888888888889e-06, 'epoch': 14.41} +[2022-12-17 08:17:21,869] [INFO] [timer.py:197:stop] 0/6802, RunningAvgSamplesPerSec=6.335624512362709, CurrSamplesPerSec=5.709963003305895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:17:33,131] [INFO] [timer.py:197:stop] 0/6804, RunningAvgSamplesPerSec=6.335632256641158, CurrSamplesPerSec=5.7104974692478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:17:44,449] [INFO] [timer.py:197:stop] 0/6806, RunningAvgSamplesPerSec=6.3356370688183095, CurrSamplesPerSec=5.702223989970849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:17:55,758] [INFO] [timer.py:197:stop] 0/6808, RunningAvgSamplesPerSec=6.335634554728273, CurrSamplesPerSec=5.681335995843267, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:18:07,044] [INFO] [timer.py:197:stop] 0/6810, RunningAvgSamplesPerSec=6.335642356203787, CurrSamplesPerSec=5.74130463368171, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:18:18,373] [INFO] [timer.py:197:stop] 0/6812, RunningAvgSamplesPerSec=6.335643910205863, CurrSamplesPerSec=5.700655798880245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:18:29,676] [INFO] [timer.py:197:stop] 0/6814, RunningAvgSamplesPerSec=6.335647904715997, CurrSamplesPerSec=5.704953851446727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:18:40,928] [INFO] [timer.py:197:stop] 0/6816, RunningAvgSamplesPerSec=6.335657061179278, CurrSamplesPerSec=5.731199217585403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:18:52,279] [INFO] [timer.py:197:stop] 0/6818, RunningAvgSamplesPerSec=6.335654143809877, CurrSamplesPerSec=5.673416040982523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:19:03,532] [INFO] [logging.py:68:log_dist] [Rank 0] step=3410, skipped=5, lr=[3.5466666666666673e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:19:03,533] [INFO] [timer.py:197:stop] 0/6820, RunningAvgSamplesPerSec=6.335662274107223, CurrSamplesPerSec=5.727299707429968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:19:14,845] [INFO] [timer.py:197:stop] 0/6822, RunningAvgSamplesPerSec=6.335671436628885, CurrSamplesPerSec=5.723870230816352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:19:26,054] [INFO] [timer.py:197:stop] 0/6824, RunningAvgSamplesPerSec=6.335688201402547, CurrSamplesPerSec=5.752427315123319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:19:37,330] [INFO] [timer.py:197:stop] 0/6826, RunningAvgSamplesPerSec=6.335696912892801, CurrSamplesPerSec=5.72277906300634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:19:48,614] [INFO] [timer.py:197:stop] 0/6828, RunningAvgSamplesPerSec=6.3357056730619945, CurrSamplesPerSec=5.72484216216728, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:19:59,931] [INFO] [timer.py:197:stop] 0/6830, RunningAvgSamplesPerSec=6.335707479480197, CurrSamplesPerSec=5.703681546110575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:20:11,197] [INFO] [timer.py:197:stop] 0/6832, RunningAvgSamplesPerSec=6.335715269638013, CurrSamplesPerSec=5.717828766235083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:20:22,486] [INFO] [timer.py:197:stop] 0/6834, RunningAvgSamplesPerSec=6.335721207825923, CurrSamplesPerSec=5.724875615566773, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:20:33,797] [INFO] [timer.py:197:stop] 0/6836, RunningAvgSamplesPerSec=6.3357270916090345, CurrSamplesPerSec=5.717510173438337, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:20:45,062] [INFO] [timer.py:197:stop] 0/6838, RunningAvgSamplesPerSec=6.335738854291303, CurrSamplesPerSec=5.7381917455513936, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:20:56,337] [INFO] [logging.py:68:log_dist] [Rank 0] step=3420, skipped=5, lr=[3.524444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:20:56,339] [INFO] [timer.py:197:stop] 0/6840, RunningAvgSamplesPerSec=6.335748624177438, CurrSamplesPerSec=5.712516466537226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:21:07,619] [INFO] [timer.py:197:stop] 0/6842, RunningAvgSamplesPerSec=6.3357580202583454, CurrSamplesPerSec=5.729844731471844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:21:18,896] [INFO] [timer.py:197:stop] 0/6844, RunningAvgSamplesPerSec=6.335764802157587, CurrSamplesPerSec=5.718550359160185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:21:30,178] [INFO] [timer.py:197:stop] 0/6846, RunningAvgSamplesPerSec=6.335770762004364, CurrSamplesPerSec=5.708233720020049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:21:41,441] [INFO] [timer.py:197:stop] 0/6848, RunningAvgSamplesPerSec=6.335778103959621, CurrSamplesPerSec=5.700215407405394, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:21:52,731] [INFO] [timer.py:197:stop] 0/6850, RunningAvgSamplesPerSec=6.335784778141418, CurrSamplesPerSec=5.717134630278461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0006, 'learning_rate': 3.5133333333333337e-06, 'epoch': 14.51} +[2022-12-17 08:22:04,061] [INFO] [timer.py:197:stop] 0/6852, RunningAvgSamplesPerSec=6.335783966524976, CurrSamplesPerSec=5.688345923740799, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:22:15,354] [INFO] [timer.py:197:stop] 0/6854, RunningAvgSamplesPerSec=6.335788130663457, CurrSamplesPerSec=5.720207398745991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:22:26,618] [INFO] [timer.py:197:stop] 0/6856, RunningAvgSamplesPerSec=6.335797860329845, CurrSamplesPerSec=5.732850129006261, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:22:37,892] [INFO] [timer.py:197:stop] 0/6858, RunningAvgSamplesPerSec=6.335802709178029, CurrSamplesPerSec=5.71466876670745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:22:49,155] [INFO] [logging.py:68:log_dist] [Rank 0] step=3430, skipped=5, lr=[3.5022222222222225e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:22:49,156] [INFO] [timer.py:197:stop] 0/6860, RunningAvgSamplesPerSec=6.335808399079511, CurrSamplesPerSec=5.727941557107226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:23:00,440] [INFO] [timer.py:197:stop] 0/6862, RunningAvgSamplesPerSec=6.335816373501924, CurrSamplesPerSec=5.718228762504417, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:23:11,737] [INFO] [timer.py:197:stop] 0/6864, RunningAvgSamplesPerSec=6.335820645429806, CurrSamplesPerSec=5.706740837128679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:23:22,999] [INFO] [timer.py:197:stop] 0/6866, RunningAvgSamplesPerSec=6.335828758770827, CurrSamplesPerSec=5.730471982504473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:23:34,230] [INFO] [timer.py:197:stop] 0/6868, RunningAvgSamplesPerSec=6.335839943703662, CurrSamplesPerSec=5.733005624138695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:23:45,488] [INFO] [timer.py:197:stop] 0/6870, RunningAvgSamplesPerSec=6.335847727715519, CurrSamplesPerSec=5.702430886064505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:23:56,774] [INFO] [timer.py:197:stop] 0/6872, RunningAvgSamplesPerSec=6.335855739519945, CurrSamplesPerSec=5.7183028239827856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:24:08,043] [INFO] [timer.py:197:stop] 0/6874, RunningAvgSamplesPerSec=6.335864068228962, CurrSamplesPerSec=5.727191198700244, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:24:19,291] [INFO] [timer.py:197:stop] 0/6876, RunningAvgSamplesPerSec=6.33587683092059, CurrSamplesPerSec=5.749437811807974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:24:30,554] [INFO] [timer.py:197:stop] 0/6878, RunningAvgSamplesPerSec=6.3358862520744275, CurrSamplesPerSec=5.734842329116901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:24:41,826] [INFO] [logging.py:68:log_dist] [Rank 0] step=3440, skipped=5, lr=[3.48e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:24:41,828] [INFO] [timer.py:197:stop] 0/6880, RunningAvgSamplesPerSec=6.335890426523647, CurrSamplesPerSec=5.705601616605988, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:24:53,089] [INFO] [timer.py:197:stop] 0/6882, RunningAvgSamplesPerSec=6.335901324756337, CurrSamplesPerSec=5.740607978399688, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:25:04,361] [INFO] [timer.py:197:stop] 0/6884, RunningAvgSamplesPerSec=6.335911221101385, CurrSamplesPerSec=5.7418000321618115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:25:15,641] [INFO] [timer.py:197:stop] 0/6886, RunningAvgSamplesPerSec=6.335917392893157, CurrSamplesPerSec=5.711138474432159, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:25:27,040] [INFO] [timer.py:197:stop] 0/6888, RunningAvgSamplesPerSec=6.335900279695778, CurrSamplesPerSec=5.609052065020303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:25:38,349] [INFO] [timer.py:197:stop] 0/6890, RunningAvgSamplesPerSec=6.335900315427077, CurrSamplesPerSec=5.696869058630994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:25:49,666] [INFO] [timer.py:197:stop] 0/6892, RunningAvgSamplesPerSec=6.335898075337393, CurrSamplesPerSec=5.699606379930252, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:26:00,949] [INFO] [timer.py:197:stop] 0/6894, RunningAvgSamplesPerSec=6.335902164159549, CurrSamplesPerSec=5.718328892130646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:26:12,240] [INFO] [timer.py:197:stop] 0/6896, RunningAvgSamplesPerSec=6.335904938835898, CurrSamplesPerSec=5.701893811029447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:26:23,574] [INFO] [timer.py:197:stop] 0/6898, RunningAvgSamplesPerSec=6.3359007400630984, CurrSamplesPerSec=5.69692273951494, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:26:34,862] [INFO] [logging.py:68:log_dist] [Rank 0] step=3450, skipped=5, lr=[3.457777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:26:34,863] [INFO] [timer.py:197:stop] 0/6900, RunningAvgSamplesPerSec=6.335905375702142, CurrSamplesPerSec=5.716917899546148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0006, 'learning_rate': 3.457777777777778e-06, 'epoch': 14.62} +[2022-12-17 08:26:46,183] [INFO] [timer.py:197:stop] 0/6902, RunningAvgSamplesPerSec=6.335905797329182, CurrSamplesPerSec=5.692203415362899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:26:57,512] [INFO] [timer.py:197:stop] 0/6904, RunningAvgSamplesPerSec=6.335902977951562, CurrSamplesPerSec=5.677874616513825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:27:08,810] [INFO] [timer.py:197:stop] 0/6906, RunningAvgSamplesPerSec=6.335904392747657, CurrSamplesPerSec=5.695849314014534, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:27:20,096] [INFO] [timer.py:197:stop] 0/6908, RunningAvgSamplesPerSec=6.335910373994993, CurrSamplesPerSec=5.736314158738321, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:27:31,401] [INFO] [timer.py:197:stop] 0/6910, RunningAvgSamplesPerSec=6.335912398613864, CurrSamplesPerSec=5.71390071854944, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:27:42,692] [INFO] [timer.py:197:stop] 0/6912, RunningAvgSamplesPerSec=6.335915392107247, CurrSamplesPerSec=5.7122050293280004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:27:53,995] [INFO] [timer.py:197:stop] 0/6914, RunningAvgSamplesPerSec=6.335916733915622, CurrSamplesPerSec=5.725665668959716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:28:05,313] [INFO] [timer.py:197:stop] 0/6916, RunningAvgSamplesPerSec=6.335914598370398, CurrSamplesPerSec=5.679297640686226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:28:16,631] [INFO] [timer.py:197:stop] 0/6918, RunningAvgSamplesPerSec=6.335915782775784, CurrSamplesPerSec=5.700484621597277, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:28:27,974] [INFO] [logging.py:68:log_dist] [Rank 0] step=3460, skipped=5, lr=[3.435555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:28:27,976] [INFO] [timer.py:197:stop] 0/6920, RunningAvgSamplesPerSec=6.335913270570877, CurrSamplesPerSec=5.707983921431771, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:28:39,318] [INFO] [timer.py:197:stop] 0/6922, RunningAvgSamplesPerSec=6.33590842048816, CurrSamplesPerSec=5.68215954167032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:28:50,626] [INFO] [timer.py:197:stop] 0/6924, RunningAvgSamplesPerSec=6.335910052087327, CurrSamplesPerSec=5.704671364224143, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:29:01,927] [INFO] [timer.py:197:stop] 0/6926, RunningAvgSamplesPerSec=6.335912457522142, CurrSamplesPerSec=5.695688576781109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:29:13,222] [INFO] [timer.py:197:stop] 0/6928, RunningAvgSamplesPerSec=6.3359141317974705, CurrSamplesPerSec=5.6967387295234175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:29:24,527] [INFO] [timer.py:197:stop] 0/6930, RunningAvgSamplesPerSec=6.335916105885563, CurrSamplesPerSec=5.687891523266612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:29:35,825] [INFO] [timer.py:197:stop] 0/6932, RunningAvgSamplesPerSec=6.33591708655648, CurrSamplesPerSec=5.690714320118148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:29:47,140] [INFO] [timer.py:197:stop] 0/6934, RunningAvgSamplesPerSec=6.335917314739031, CurrSamplesPerSec=5.708687248940789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:29:58,455] [INFO] [timer.py:197:stop] 0/6936, RunningAvgSamplesPerSec=6.335917005333467, CurrSamplesPerSec=5.681345615327618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:30:09,741] [INFO] [timer.py:197:stop] 0/6938, RunningAvgSamplesPerSec=6.335926384666303, CurrSamplesPerSec=5.7203936594189075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:30:21,016] [INFO] [logging.py:68:log_dist] [Rank 0] step=3470, skipped=5, lr=[3.4133333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:30:21,017] [INFO] [timer.py:197:stop] 0/6940, RunningAvgSamplesPerSec=6.335933667012311, CurrSamplesPerSec=5.712208919044687, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:30:32,294] [INFO] [timer.py:197:stop] 0/6942, RunningAvgSamplesPerSec=6.335941308188668, CurrSamplesPerSec=5.7301470865111455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:30:43,621] [INFO] [timer.py:197:stop] 0/6944, RunningAvgSamplesPerSec=6.3359411875298095, CurrSamplesPerSec=5.714233019399983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:30:55,097] [INFO] [timer.py:197:stop] 0/6946, RunningAvgSamplesPerSec=6.335946099866804, CurrSamplesPerSec=5.717290004668645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:31:06,462] [INFO] [timer.py:197:stop] 0/6948, RunningAvgSamplesPerSec=6.3359497882734175, CurrSamplesPerSec=5.712707089669525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:31:17,758] [INFO] [timer.py:197:stop] 0/6950, RunningAvgSamplesPerSec=6.335949974405662, CurrSamplesPerSec=5.708783402445716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0006, 'learning_rate': 3.4022222222222222e-06, 'epoch': 14.72} +[2022-12-17 08:31:29,055] [INFO] [timer.py:197:stop] 0/6952, RunningAvgSamplesPerSec=6.335951038316049, CurrSamplesPerSec=5.677335192591269, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:31:40,326] [INFO] [timer.py:197:stop] 0/6954, RunningAvgSamplesPerSec=6.335954671614913, CurrSamplesPerSec=5.711804659953133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:31:51,641] [INFO] [timer.py:197:stop] 0/6956, RunningAvgSamplesPerSec=6.335953433679765, CurrSamplesPerSec=5.7017620408218574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:32:02,923] [INFO] [timer.py:197:stop] 0/6958, RunningAvgSamplesPerSec=6.335957884016047, CurrSamplesPerSec=5.715676519597433, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:32:14,226] [INFO] [logging.py:68:log_dist] [Rank 0] step=3480, skipped=5, lr=[3.391111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:32:14,227] [INFO] [timer.py:197:stop] 0/6960, RunningAvgSamplesPerSec=6.3359596235734665, CurrSamplesPerSec=5.70464784509039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:32:25,477] [INFO] [timer.py:197:stop] 0/6962, RunningAvgSamplesPerSec=6.335971526534016, CurrSamplesPerSec=5.738433890664609, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:32:36,769] [INFO] [timer.py:197:stop] 0/6964, RunningAvgSamplesPerSec=6.335978612982931, CurrSamplesPerSec=5.727787558729274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:32:48,040] [INFO] [timer.py:197:stop] 0/6966, RunningAvgSamplesPerSec=6.335986047607533, CurrSamplesPerSec=5.71634717273252, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:32:59,310] [INFO] [timer.py:197:stop] 0/6968, RunningAvgSamplesPerSec=6.335992538080176, CurrSamplesPerSec=5.719552169637749, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:33:10,601] [INFO] [timer.py:197:stop] 0/6970, RunningAvgSamplesPerSec=6.335995701230729, CurrSamplesPerSec=5.7108582903242375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:33:21,893] [INFO] [timer.py:197:stop] 0/6972, RunningAvgSamplesPerSec=6.335996009422708, CurrSamplesPerSec=5.704970340835455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:33:33,190] [INFO] [timer.py:197:stop] 0/6974, RunningAvgSamplesPerSec=6.335998452897874, CurrSamplesPerSec=5.711568159668565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:33:44,470] [INFO] [timer.py:197:stop] 0/6976, RunningAvgSamplesPerSec=6.336001605932553, CurrSamplesPerSec=5.7131795689290055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:33:55,763] [INFO] [timer.py:197:stop] 0/6978, RunningAvgSamplesPerSec=6.336006264641368, CurrSamplesPerSec=5.7205270236611, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:34:07,028] [INFO] [logging.py:68:log_dist] [Rank 0] step=3490, skipped=5, lr=[3.3688888888888895e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:34:07,030] [INFO] [timer.py:197:stop] 0/6980, RunningAvgSamplesPerSec=6.3360142860991475, CurrSamplesPerSec=5.741111360582159, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:34:18,336] [INFO] [timer.py:197:stop] 0/6982, RunningAvgSamplesPerSec=6.336015650217596, CurrSamplesPerSec=5.715640252824482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:34:29,634] [INFO] [timer.py:197:stop] 0/6984, RunningAvgSamplesPerSec=6.336015398383265, CurrSamplesPerSec=5.694377154970248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:34:40,938] [INFO] [timer.py:197:stop] 0/6986, RunningAvgSamplesPerSec=6.336013926721686, CurrSamplesPerSec=5.701118779608611, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:34:52,254] [INFO] [timer.py:197:stop] 0/6988, RunningAvgSamplesPerSec=6.336016999623785, CurrSamplesPerSec=5.708849691975506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:35:03,578] [INFO] [timer.py:197:stop] 0/6990, RunningAvgSamplesPerSec=6.336015230397008, CurrSamplesPerSec=5.689597889276906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:35:14,843] [INFO] [timer.py:197:stop] 0/6992, RunningAvgSamplesPerSec=6.336022713412811, CurrSamplesPerSec=5.721351728246623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:35:26,128] [INFO] [timer.py:197:stop] 0/6994, RunningAvgSamplesPerSec=6.3360238162424, CurrSamplesPerSec=5.69377396361336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:35:37,338] [INFO] [timer.py:197:stop] 0/6996, RunningAvgSamplesPerSec=6.336030134866878, CurrSamplesPerSec=5.723018444873249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:35:48,651] [INFO] [timer.py:197:stop] 0/6998, RunningAvgSamplesPerSec=6.336030827930519, CurrSamplesPerSec=5.707811332846688, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:35:59,959] [INFO] [logging.py:68:log_dist] [Rank 0] step=3500, skipped=5, lr=[3.346666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:35:59,961] [INFO] [timer.py:197:stop] 0/7000, RunningAvgSamplesPerSec=6.3360310804451325, CurrSamplesPerSec=5.698258800513624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 3.346666666666667e-06, 'epoch': 14.83} +[2022-12-17 08:36:11,249] [INFO] [timer.py:197:stop] 0/7002, RunningAvgSamplesPerSec=6.336033006654374, CurrSamplesPerSec=5.708592798272643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:36:22,512] [INFO] [timer.py:197:stop] 0/7004, RunningAvgSamplesPerSec=6.336039423997487, CurrSamplesPerSec=5.725660295371774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:36:33,824] [INFO] [timer.py:197:stop] 0/7006, RunningAvgSamplesPerSec=6.336040422932118, CurrSamplesPerSec=5.715054693343505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:36:45,159] [INFO] [timer.py:197:stop] 0/7008, RunningAvgSamplesPerSec=6.3360402778094, CurrSamplesPerSec=5.694589763880119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:36:56,492] [INFO] [timer.py:197:stop] 0/7010, RunningAvgSamplesPerSec=6.336039235706016, CurrSamplesPerSec=5.71252570563038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:37:07,829] [INFO] [timer.py:197:stop] 0/7012, RunningAvgSamplesPerSec=6.3360369664535945, CurrSamplesPerSec=5.674658803355623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:37:19,106] [INFO] [timer.py:197:stop] 0/7014, RunningAvgSamplesPerSec=6.336042286637709, CurrSamplesPerSec=5.713445631272786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:37:30,452] [INFO] [timer.py:197:stop] 0/7016, RunningAvgSamplesPerSec=6.336035252063371, CurrSamplesPerSec=5.665982842191559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:37:41,744] [INFO] [timer.py:197:stop] 0/7018, RunningAvgSamplesPerSec=6.336039430102445, CurrSamplesPerSec=5.712375695630346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:37:53,057] [INFO] [logging.py:68:log_dist] [Rank 0] step=3510, skipped=5, lr=[3.3244444444444447e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:37:53,058] [INFO] [timer.py:197:stop] 0/7020, RunningAvgSamplesPerSec=6.336039328173379, CurrSamplesPerSec=5.693599817998553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:38:04,340] [INFO] [timer.py:197:stop] 0/7022, RunningAvgSamplesPerSec=6.336045469751685, CurrSamplesPerSec=5.728534650432044, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:38:15,651] [INFO] [timer.py:197:stop] 0/7024, RunningAvgSamplesPerSec=6.336046339089011, CurrSamplesPerSec=5.6930873458324065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:38:27,001] [INFO] [timer.py:197:stop] 0/7026, RunningAvgSamplesPerSec=6.336042450409582, CurrSamplesPerSec=5.693363132300313, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:38:38,326] [INFO] [timer.py:197:stop] 0/7028, RunningAvgSamplesPerSec=6.3360397271690445, CurrSamplesPerSec=5.682382786805048, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:38:49,613] [INFO] [timer.py:197:stop] 0/7030, RunningAvgSamplesPerSec=6.3360431729577815, CurrSamplesPerSec=5.709435194920618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:39:00,942] [INFO] [timer.py:197:stop] 0/7032, RunningAvgSamplesPerSec=6.336039455924966, CurrSamplesPerSec=5.684558179697593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:39:12,245] [INFO] [timer.py:197:stop] 0/7034, RunningAvgSamplesPerSec=6.336040089069696, CurrSamplesPerSec=5.69708789920933, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:39:23,556] [INFO] [timer.py:197:stop] 0/7036, RunningAvgSamplesPerSec=6.336039921529636, CurrSamplesPerSec=5.709073339439505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:39:34,893] [INFO] [timer.py:197:stop] 0/7038, RunningAvgSamplesPerSec=6.336036874526808, CurrSamplesPerSec=5.694211186094316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:39:46,334] [INFO] [logging.py:68:log_dist] [Rank 0] step=3520, skipped=5, lr=[3.3022222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:39:46,336] [INFO] [timer.py:197:stop] 0/7040, RunningAvgSamplesPerSec=6.336048338439192, CurrSamplesPerSec=5.736273216710899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:39:57,602] [INFO] [timer.py:197:stop] 0/7042, RunningAvgSamplesPerSec=6.336055473638131, CurrSamplesPerSec=5.747396574121967, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:40:08,902] [INFO] [timer.py:197:stop] 0/7044, RunningAvgSamplesPerSec=6.33605758570761, CurrSamplesPerSec=5.697080402734628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:40:20,185] [INFO] [timer.py:197:stop] 0/7046, RunningAvgSamplesPerSec=6.336061026686192, CurrSamplesPerSec=5.710510832178299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:40:31,484] [INFO] [timer.py:197:stop] 0/7048, RunningAvgSamplesPerSec=6.3360633201784, CurrSamplesPerSec=5.714192635242332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:40:42,778] [INFO] [timer.py:197:stop] 0/7050, RunningAvgSamplesPerSec=6.336067257937258, CurrSamplesPerSec=5.706152732937158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 3.2911111111111116e-06, 'epoch': 14.94} +[2022-12-17 08:40:54,074] [INFO] [timer.py:197:stop] 0/7052, RunningAvgSamplesPerSec=6.336070154814537, CurrSamplesPerSec=5.722531893571896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:41:05,367] [INFO] [timer.py:197:stop] 0/7054, RunningAvgSamplesPerSec=6.336073540324857, CurrSamplesPerSec=5.701336978082232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:41:16,666] [INFO] [timer.py:197:stop] 0/7056, RunningAvgSamplesPerSec=6.336075504694328, CurrSamplesPerSec=5.718039719534393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:41:27,914] [INFO] [timer.py:197:stop] 0/7058, RunningAvgSamplesPerSec=6.336083962326727, CurrSamplesPerSec=5.726415871971864, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:41:39,192] [INFO] [logging.py:68:log_dist] [Rank 0] step=3530, skipped=5, lr=[3.2800000000000004e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:41:39,194] [INFO] [timer.py:197:stop] 0/7060, RunningAvgSamplesPerSec=6.3360858012627155, CurrSamplesPerSec=5.694563428505969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:41:50,461] [INFO] [timer.py:197:stop] 0/7062, RunningAvgSamplesPerSec=6.3360878851830895, CurrSamplesPerSec=5.695557334911018, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:42:01,756] [INFO] [timer.py:197:stop] 0/7064, RunningAvgSamplesPerSec=6.336091603295266, CurrSamplesPerSec=5.708733382793981, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:42:13,057] [INFO] [timer.py:197:stop] 0/7066, RunningAvgSamplesPerSec=6.336093431862512, CurrSamplesPerSec=5.712168563491688, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:42:24,335] [INFO] [timer.py:197:stop] 0/7068, RunningAvgSamplesPerSec=6.3360968188326225, CurrSamplesPerSec=5.7172568834007444, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:42:35,613] [INFO] [timer.py:197:stop] 0/7070, RunningAvgSamplesPerSec=6.3361020476538545, CurrSamplesPerSec=5.709504171237318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:42:46,948] [INFO] [timer.py:197:stop] 0/7072, RunningAvgSamplesPerSec=6.3360998344648705, CurrSamplesPerSec=5.685475140006454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:42:58,252] [INFO] [timer.py:197:stop] 0/7074, RunningAvgSamplesPerSec=6.3360977220499555, CurrSamplesPerSec=5.704194472889636, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:43:09,577] [INFO] [timer.py:197:stop] 0/7076, RunningAvgSamplesPerSec=6.336097357975871, CurrSamplesPerSec=5.691573169905834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:43:20,894] [INFO] [timer.py:197:stop] 0/7078, RunningAvgSamplesPerSec=6.336095189587284, CurrSamplesPerSec=5.695575461881137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:43:29,363] [INFO] [logging.py:68:log_dist] [Rank 0] step=3540, skipped=5, lr=[3.257777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:43:29,365] [INFO] [timer.py:197:stop] 0/7080, RunningAvgSamplesPerSec=6.336537019491826, CurrSamplesPerSec=10.248503148784106, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:43:40,680] [INFO] [timer.py:197:stop] 0/7082, RunningAvgSamplesPerSec=6.336535244705349, CurrSamplesPerSec=5.7099207361778825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:43:51,992] [INFO] [timer.py:197:stop] 0/7084, RunningAvgSamplesPerSec=6.33653543670802, CurrSamplesPerSec=5.7024248291739665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:44:03,287] [INFO] [timer.py:197:stop] 0/7086, RunningAvgSamplesPerSec=6.336538873598614, CurrSamplesPerSec=5.714536892000577, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:44:14,627] [INFO] [timer.py:197:stop] 0/7088, RunningAvgSamplesPerSec=6.336534355904899, CurrSamplesPerSec=5.702760884907398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:44:25,917] [INFO] [timer.py:197:stop] 0/7090, RunningAvgSamplesPerSec=6.336532799418478, CurrSamplesPerSec=5.703959329597068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:44:37,187] [INFO] [timer.py:197:stop] 0/7092, RunningAvgSamplesPerSec=6.336539300908651, CurrSamplesPerSec=5.730101339808488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:44:48,486] [INFO] [timer.py:197:stop] 0/7094, RunningAvgSamplesPerSec=6.336541123049372, CurrSamplesPerSec=5.716769850245525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:44:59,768] [INFO] [timer.py:197:stop] 0/7096, RunningAvgSamplesPerSec=6.336539637361981, CurrSamplesPerSec=5.7223798937525965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:45:11,052] [INFO] [timer.py:197:stop] 0/7098, RunningAvgSamplesPerSec=6.33654313167868, CurrSamplesPerSec=5.709941140920177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:45:22,316] [INFO] [logging.py:68:log_dist] [Rank 0] step=3550, skipped=5, lr=[3.2355555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:45:22,318] [INFO] [timer.py:197:stop] 0/7100, RunningAvgSamplesPerSec=6.336550468912335, CurrSamplesPerSec=5.725339119683709, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0006, 'learning_rate': 3.2355555555555556e-06, 'epoch': 15.04} +[2022-12-17 08:45:33,597] [INFO] [timer.py:197:stop] 0/7102, RunningAvgSamplesPerSec=6.336555961465411, CurrSamplesPerSec=5.706621702272997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:45:44,865] [INFO] [timer.py:197:stop] 0/7104, RunningAvgSamplesPerSec=6.336565479135939, CurrSamplesPerSec=5.7179184073056994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:45:56,203] [INFO] [timer.py:197:stop] 0/7106, RunningAvgSamplesPerSec=6.33656338842234, CurrSamplesPerSec=5.695559026756681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:46:07,461] [INFO] [timer.py:197:stop] 0/7108, RunningAvgSamplesPerSec=6.33657199020889, CurrSamplesPerSec=5.729104145368554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:46:18,727] [INFO] [timer.py:197:stop] 0/7110, RunningAvgSamplesPerSec=6.3365792546897355, CurrSamplesPerSec=5.70705580414679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:46:29,992] [INFO] [timer.py:197:stop] 0/7112, RunningAvgSamplesPerSec=6.336586540261577, CurrSamplesPerSec=5.726482327303658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:46:41,286] [INFO] [timer.py:197:stop] 0/7114, RunningAvgSamplesPerSec=6.336590431273355, CurrSamplesPerSec=5.718244354235172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:46:52,577] [INFO] [timer.py:197:stop] 0/7116, RunningAvgSamplesPerSec=6.336594859000273, CurrSamplesPerSec=5.7176119828726035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:47:03,929] [INFO] [timer.py:197:stop] 0/7118, RunningAvgSamplesPerSec=6.336587273287749, CurrSamplesPerSec=5.647525654346897, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:47:15,219] [INFO] [logging.py:68:log_dist] [Rank 0] step=3560, skipped=5, lr=[3.213333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:47:15,220] [INFO] [timer.py:197:stop] 0/7120, RunningAvgSamplesPerSec=6.336591838649718, CurrSamplesPerSec=5.72518770334102, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:47:26,510] [INFO] [timer.py:197:stop] 0/7122, RunningAvgSamplesPerSec=6.336596657755495, CurrSamplesPerSec=5.721397091458818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:47:37,763] [INFO] [timer.py:197:stop] 0/7124, RunningAvgSamplesPerSec=6.336604749335138, CurrSamplesPerSec=5.7333793366997465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:47:49,080] [INFO] [timer.py:197:stop] 0/7126, RunningAvgSamplesPerSec=6.336609154333121, CurrSamplesPerSec=5.721207351198136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:48:00,371] [INFO] [timer.py:197:stop] 0/7128, RunningAvgSamplesPerSec=6.336612749558974, CurrSamplesPerSec=5.717910855911447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:48:11,629] [INFO] [timer.py:197:stop] 0/7130, RunningAvgSamplesPerSec=6.336622000394005, CurrSamplesPerSec=5.724012789389099, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:48:22,953] [INFO] [timer.py:197:stop] 0/7132, RunningAvgSamplesPerSec=6.336620699917402, CurrSamplesPerSec=5.681224652682598, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:48:34,213] [INFO] [timer.py:197:stop] 0/7134, RunningAvgSamplesPerSec=6.336627883073419, CurrSamplesPerSec=5.721560258636635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:48:45,473] [INFO] [timer.py:197:stop] 0/7136, RunningAvgSamplesPerSec=6.336637623973165, CurrSamplesPerSec=5.725201135112214, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:48:56,737] [INFO] [timer.py:197:stop] 0/7138, RunningAvgSamplesPerSec=6.3366465793779305, CurrSamplesPerSec=5.730776116579845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:49:07,989] [INFO] [logging.py:68:log_dist] [Rank 0] step=3570, skipped=5, lr=[3.1911111111111117e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:49:07,989] [INFO] [timer.py:197:stop] 0/7140, RunningAvgSamplesPerSec=6.3366573634205965, CurrSamplesPerSec=5.756063900681472, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:49:19,275] [INFO] [timer.py:197:stop] 0/7142, RunningAvgSamplesPerSec=6.336667028427543, CurrSamplesPerSec=5.72736545007946, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:49:30,524] [INFO] [timer.py:197:stop] 0/7144, RunningAvgSamplesPerSec=6.336676511407681, CurrSamplesPerSec=5.725585554694367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:49:41,775] [INFO] [timer.py:197:stop] 0/7146, RunningAvgSamplesPerSec=6.336686159963732, CurrSamplesPerSec=5.7252008908976295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:49:53,178] [INFO] [timer.py:197:stop] 0/7148, RunningAvgSamplesPerSec=6.336691032030077, CurrSamplesPerSec=5.711277483169063, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:50:04,418] [INFO] [timer.py:197:stop] 0/7150, RunningAvgSamplesPerSec=6.3367025598086615, CurrSamplesPerSec=5.74369571437387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0006, 'learning_rate': 3.1800000000000005e-06, 'epoch': 15.15} +[2022-12-17 08:50:15,683] [INFO] [timer.py:197:stop] 0/7152, RunningAvgSamplesPerSec=6.336711577607592, CurrSamplesPerSec=5.735962370560981, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:50:26,909] [INFO] [timer.py:197:stop] 0/7154, RunningAvgSamplesPerSec=6.336721119542908, CurrSamplesPerSec=5.736889370195829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:50:38,183] [INFO] [timer.py:197:stop] 0/7156, RunningAvgSamplesPerSec=6.336727910475315, CurrSamplesPerSec=5.719653320583004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:50:49,437] [INFO] [timer.py:197:stop] 0/7158, RunningAvgSamplesPerSec=6.336736546883692, CurrSamplesPerSec=5.747683062543447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:51:00,702] [INFO] [logging.py:68:log_dist] [Rank 0] step=3580, skipped=5, lr=[3.1688888888888893e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:51:00,703] [INFO] [timer.py:197:stop] 0/7160, RunningAvgSamplesPerSec=6.3367400491662975, CurrSamplesPerSec=5.724335525268068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:51:11,984] [INFO] [timer.py:197:stop] 0/7162, RunningAvgSamplesPerSec=6.336746149962355, CurrSamplesPerSec=5.696210945525485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:51:23,262] [INFO] [timer.py:197:stop] 0/7164, RunningAvgSamplesPerSec=6.336751783981821, CurrSamplesPerSec=5.703505097354939, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:51:34,557] [INFO] [timer.py:197:stop] 0/7166, RunningAvgSamplesPerSec=6.336751668168491, CurrSamplesPerSec=5.679854743571504, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:51:45,838] [INFO] [timer.py:197:stop] 0/7168, RunningAvgSamplesPerSec=6.3367593201336785, CurrSamplesPerSec=5.742135831748037, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:51:57,139] [INFO] [timer.py:197:stop] 0/7170, RunningAvgSamplesPerSec=6.336761339660549, CurrSamplesPerSec=5.705994082024692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:52:08,614] [INFO] [timer.py:197:stop] 0/7172, RunningAvgSamplesPerSec=6.336769390384303, CurrSamplesPerSec=5.7323410945044655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:52:19,853] [INFO] [timer.py:197:stop] 0/7174, RunningAvgSamplesPerSec=6.336781230655281, CurrSamplesPerSec=5.7495683466982435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:52:31,087] [INFO] [timer.py:197:stop] 0/7176, RunningAvgSamplesPerSec=6.336794208617129, CurrSamplesPerSec=5.748378728034682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:52:42,362] [INFO] [timer.py:197:stop] 0/7178, RunningAvgSamplesPerSec=6.336800609932669, CurrSamplesPerSec=5.710724647572078, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:52:53,636] [INFO] [logging.py:68:log_dist] [Rank 0] step=3590, skipped=5, lr=[3.146666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:52:53,638] [INFO] [timer.py:197:stop] 0/7180, RunningAvgSamplesPerSec=6.3368073192102194, CurrSamplesPerSec=5.725770211677484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:53:04,925] [INFO] [timer.py:197:stop] 0/7182, RunningAvgSamplesPerSec=6.336812826879142, CurrSamplesPerSec=5.709809970121474, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:53:16,221] [INFO] [timer.py:197:stop] 0/7184, RunningAvgSamplesPerSec=6.336816386862075, CurrSamplesPerSec=5.710592954833432, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:53:27,496] [INFO] [timer.py:197:stop] 0/7186, RunningAvgSamplesPerSec=6.336823078431179, CurrSamplesPerSec=5.733743545157348, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:53:38,771] [INFO] [timer.py:197:stop] 0/7188, RunningAvgSamplesPerSec=6.336827209844939, CurrSamplesPerSec=5.720442420760707, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:53:50,057] [INFO] [timer.py:197:stop] 0/7190, RunningAvgSamplesPerSec=6.336832873580546, CurrSamplesPerSec=5.706346570719739, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:54:01,335] [INFO] [timer.py:197:stop] 0/7192, RunningAvgSamplesPerSec=6.336842179287273, CurrSamplesPerSec=5.726127835258792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:54:12,593] [INFO] [timer.py:197:stop] 0/7194, RunningAvgSamplesPerSec=6.336850813089948, CurrSamplesPerSec=5.735628518137128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:54:23,825] [INFO] [timer.py:197:stop] 0/7196, RunningAvgSamplesPerSec=6.336863840754564, CurrSamplesPerSec=5.7346649272389785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:54:35,047] [INFO] [timer.py:197:stop] 0/7198, RunningAvgSamplesPerSec=6.336880314132211, CurrSamplesPerSec=5.759368218107903, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:54:46,281] [INFO] [logging.py:68:log_dist] [Rank 0] step=3600, skipped=5, lr=[3.124444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:54:46,282] [INFO] [timer.py:197:stop] 0/7200, RunningAvgSamplesPerSec=6.336892118427297, CurrSamplesPerSec=5.752183248066266, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 3.124444444444445e-06, 'epoch': 15.25} +[2022-12-17 08:54:57,558] [INFO] [timer.py:197:stop] 0/7202, RunningAvgSamplesPerSec=6.336900840680547, CurrSamplesPerSec=5.720915448919804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:55:08,863] [INFO] [timer.py:197:stop] 0/7204, RunningAvgSamplesPerSec=6.336903865225444, CurrSamplesPerSec=5.696353096685793, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:55:20,130] [INFO] [timer.py:197:stop] 0/7206, RunningAvgSamplesPerSec=6.336912767375875, CurrSamplesPerSec=5.728348103719084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:55:31,375] [INFO] [timer.py:197:stop] 0/7208, RunningAvgSamplesPerSec=6.336919776426203, CurrSamplesPerSec=5.717917920118372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:55:42,843] [INFO] [timer.py:197:stop] 0/7210, RunningAvgSamplesPerSec=6.336926136940396, CurrSamplesPerSec=5.734597791822031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:55:54,116] [INFO] [timer.py:197:stop] 0/7212, RunningAvgSamplesPerSec=6.336932166862068, CurrSamplesPerSec=5.730988026336347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:56:05,383] [INFO] [timer.py:197:stop] 0/7214, RunningAvgSamplesPerSec=6.336941030459985, CurrSamplesPerSec=5.731765081284228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:56:16,674] [INFO] [timer.py:197:stop] 0/7216, RunningAvgSamplesPerSec=6.336945032001339, CurrSamplesPerSec=5.706284706166854, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:56:27,964] [INFO] [timer.py:197:stop] 0/7218, RunningAvgSamplesPerSec=6.336949196971727, CurrSamplesPerSec=5.70364640091355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:56:39,222] [INFO] [logging.py:68:log_dist] [Rank 0] step=3610, skipped=5, lr=[3.1022222222222225e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:56:39,224] [INFO] [timer.py:197:stop] 0/7220, RunningAvgSamplesPerSec=6.336958361609847, CurrSamplesPerSec=5.727809557990573, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:56:50,500] [INFO] [timer.py:197:stop] 0/7222, RunningAvgSamplesPerSec=6.336965881887145, CurrSamplesPerSec=5.725152781030868, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:57:01,803] [INFO] [timer.py:197:stop] 0/7224, RunningAvgSamplesPerSec=6.336965178997902, CurrSamplesPerSec=5.697206152662326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:57:13,080] [INFO] [timer.py:197:stop] 0/7226, RunningAvgSamplesPerSec=6.33696923817337, CurrSamplesPerSec=5.711739030983794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:57:24,359] [INFO] [timer.py:197:stop] 0/7228, RunningAvgSamplesPerSec=6.33697472615728, CurrSamplesPerSec=5.707027654678077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:57:35,648] [INFO] [timer.py:197:stop] 0/7230, RunningAvgSamplesPerSec=6.336980410963525, CurrSamplesPerSec=5.710876271826484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:57:46,953] [INFO] [timer.py:197:stop] 0/7232, RunningAvgSamplesPerSec=6.33698384390746, CurrSamplesPerSec=5.7000343315881015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:57:58,269] [INFO] [timer.py:197:stop] 0/7234, RunningAvgSamplesPerSec=6.3369858943607245, CurrSamplesPerSec=5.696722287684846, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:58:09,567] [INFO] [timer.py:197:stop] 0/7236, RunningAvgSamplesPerSec=6.336990611755504, CurrSamplesPerSec=5.717968344447178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:58:20,912] [INFO] [timer.py:197:stop] 0/7238, RunningAvgSamplesPerSec=6.336989861540987, CurrSamplesPerSec=5.703959572002851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:58:32,520] [INFO] [logging.py:68:log_dist] [Rank 0] step=3620, skipped=5, lr=[3.08e-06], mom=[[0.9, 0.999]] +[2022-12-17 08:58:32,522] [INFO] [timer.py:197:stop] 0/7240, RunningAvgSamplesPerSec=6.336985721996713, CurrSamplesPerSec=5.6804374391771955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:58:43,801] [INFO] [timer.py:197:stop] 0/7242, RunningAvgSamplesPerSec=6.336987946622146, CurrSamplesPerSec=5.715724470332622, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:58:55,080] [INFO] [timer.py:197:stop] 0/7244, RunningAvgSamplesPerSec=6.336995401497639, CurrSamplesPerSec=5.731321094009947, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:59:06,399] [INFO] [timer.py:197:stop] 0/7246, RunningAvgSamplesPerSec=6.336996297692054, CurrSamplesPerSec=5.692427209007267, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:59:17,689] [INFO] [timer.py:197:stop] 0/7248, RunningAvgSamplesPerSec=6.3370017557976315, CurrSamplesPerSec=5.719900241410128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:59:29,023] [INFO] [timer.py:197:stop] 0/7250, RunningAvgSamplesPerSec=6.336998937313612, CurrSamplesPerSec=5.69223359147299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 3.068888888888889e-06, 'epoch': 15.36} +[2022-12-17 08:59:40,346] [INFO] [timer.py:197:stop] 0/7252, RunningAvgSamplesPerSec=6.336997740259851, CurrSamplesPerSec=5.705874493208206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 08:59:51,644] [INFO] [timer.py:197:stop] 0/7254, RunningAvgSamplesPerSec=6.337000250700455, CurrSamplesPerSec=5.706744719407257, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:00:02,933] [INFO] [timer.py:197:stop] 0/7256, RunningAvgSamplesPerSec=6.337003726190621, CurrSamplesPerSec=5.710104869965497, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:00:14,204] [INFO] [timer.py:197:stop] 0/7258, RunningAvgSamplesPerSec=6.33701078437247, CurrSamplesPerSec=5.728722431870606, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:00:25,504] [INFO] [logging.py:68:log_dist] [Rank 0] step=3630, skipped=5, lr=[3.0577777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:00:25,506] [INFO] [timer.py:197:stop] 0/7260, RunningAvgSamplesPerSec=6.337015045017911, CurrSamplesPerSec=5.700771779147463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:00:36,775] [INFO] [timer.py:197:stop] 0/7262, RunningAvgSamplesPerSec=6.337021794010251, CurrSamplesPerSec=5.731328925604024, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:00:48,050] [INFO] [timer.py:197:stop] 0/7264, RunningAvgSamplesPerSec=6.337026725477906, CurrSamplesPerSec=5.727018179771592, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:00:59,358] [INFO] [timer.py:197:stop] 0/7266, RunningAvgSamplesPerSec=6.337026294876397, CurrSamplesPerSec=5.686934747484086, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:01:10,668] [INFO] [timer.py:197:stop] 0/7268, RunningAvgSamplesPerSec=6.3370274181598685, CurrSamplesPerSec=5.7002589835173, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:01:21,952] [INFO] [timer.py:197:stop] 0/7270, RunningAvgSamplesPerSec=6.3370293113435805, CurrSamplesPerSec=5.722975740218383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:01:33,275] [INFO] [timer.py:197:stop] 0/7272, RunningAvgSamplesPerSec=6.337027527195671, CurrSamplesPerSec=5.686863664902051, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:01:44,567] [INFO] [timer.py:197:stop] 0/7274, RunningAvgSamplesPerSec=6.337031035737247, CurrSamplesPerSec=5.712275774378226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:01:55,858] [INFO] [timer.py:197:stop] 0/7276, RunningAvgSamplesPerSec=6.337034122181147, CurrSamplesPerSec=5.715932347290631, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:02:07,148] [INFO] [timer.py:197:stop] 0/7278, RunningAvgSamplesPerSec=6.337035184613046, CurrSamplesPerSec=5.7143690160321805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:02:18,405] [INFO] [logging.py:68:log_dist] [Rank 0] step=3640, skipped=5, lr=[3.0355555555555562e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:02:18,405] [INFO] [timer.py:197:stop] 0/7280, RunningAvgSamplesPerSec=6.337041606835495, CurrSamplesPerSec=5.735732689648618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:02:29,718] [INFO] [timer.py:197:stop] 0/7282, RunningAvgSamplesPerSec=6.337040061689705, CurrSamplesPerSec=5.697223322794721, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:02:40,997] [INFO] [timer.py:197:stop] 0/7284, RunningAvgSamplesPerSec=6.337041604685777, CurrSamplesPerSec=5.703056753343804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:02:52,305] [INFO] [timer.py:197:stop] 0/7286, RunningAvgSamplesPerSec=6.337040531002212, CurrSamplesPerSec=5.691550965411008, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:03:03,804] [INFO] [timer.py:197:stop] 0/7288, RunningAvgSamplesPerSec=6.3370375606154, CurrSamplesPerSec=5.692482013369373, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:03:15,085] [INFO] [timer.py:197:stop] 0/7290, RunningAvgSamplesPerSec=6.3370364233566185, CurrSamplesPerSec=5.685198672295993, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:03:26,380] [INFO] [timer.py:197:stop] 0/7292, RunningAvgSamplesPerSec=6.337039201860143, CurrSamplesPerSec=5.705081161700345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:03:37,699] [INFO] [timer.py:197:stop] 0/7294, RunningAvgSamplesPerSec=6.337037053966658, CurrSamplesPerSec=5.6884517600571955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:03:48,993] [INFO] [timer.py:197:stop] 0/7296, RunningAvgSamplesPerSec=6.3370387233533405, CurrSamplesPerSec=5.710163659243045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:04:00,267] [INFO] [timer.py:197:stop] 0/7298, RunningAvgSamplesPerSec=6.337044139345244, CurrSamplesPerSec=5.708209685969152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:04:11,543] [INFO] [logging.py:68:log_dist] [Rank 0] step=3650, skipped=5, lr=[3.013333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:04:11,544] [INFO] [timer.py:197:stop] 0/7300, RunningAvgSamplesPerSec=6.337044618092546, CurrSamplesPerSec=5.696785395846913, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0005, 'learning_rate': 3.013333333333334e-06, 'epoch': 15.47} +[2022-12-17 09:04:22,940] [INFO] [timer.py:197:stop] 0/7302, RunningAvgSamplesPerSec=6.337029950830808, CurrSamplesPerSec=5.638348476166631, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:04:34,249] [INFO] [timer.py:197:stop] 0/7304, RunningAvgSamplesPerSec=6.33703051293742, CurrSamplesPerSec=5.69282196884546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:04:45,585] [INFO] [timer.py:197:stop] 0/7306, RunningAvgSamplesPerSec=6.337026361968943, CurrSamplesPerSec=5.6815235816639715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:04:56,900] [INFO] [timer.py:197:stop] 0/7308, RunningAvgSamplesPerSec=6.33702460748814, CurrSamplesPerSec=5.684987968011892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:05:08,312] [INFO] [timer.py:197:stop] 0/7310, RunningAvgSamplesPerSec=6.337026326779322, CurrSamplesPerSec=5.708179825763601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:05:19,627] [INFO] [timer.py:197:stop] 0/7312, RunningAvgSamplesPerSec=6.337026380581206, CurrSamplesPerSec=5.70632522123226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:05:30,935] [INFO] [timer.py:197:stop] 0/7314, RunningAvgSamplesPerSec=6.337026956602852, CurrSamplesPerSec=5.699977929305063, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:05:42,282] [INFO] [timer.py:197:stop] 0/7316, RunningAvgSamplesPerSec=6.337026503478041, CurrSamplesPerSec=5.697505798420344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:05:53,591] [INFO] [timer.py:197:stop] 0/7318, RunningAvgSamplesPerSec=6.3370269736787606, CurrSamplesPerSec=5.71353829682849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:06:04,920] [INFO] [logging.py:68:log_dist] [Rank 0] step=3660, skipped=5, lr=[2.9911111111111115e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:06:04,921] [INFO] [timer.py:197:stop] 0/7320, RunningAvgSamplesPerSec=6.337023615887206, CurrSamplesPerSec=5.6932969604312555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:06:16,231] [INFO] [timer.py:197:stop] 0/7322, RunningAvgSamplesPerSec=6.3370237639173626, CurrSamplesPerSec=5.698623641485544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:06:27,522] [INFO] [timer.py:197:stop] 0/7324, RunningAvgSamplesPerSec=6.337026244413775, CurrSamplesPerSec=5.7142408043634685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:06:38,895] [INFO] [timer.py:197:stop] 0/7326, RunningAvgSamplesPerSec=6.337015878739829, CurrSamplesPerSec=5.653619902831238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:06:50,192] [INFO] [timer.py:197:stop] 0/7328, RunningAvgSamplesPerSec=6.337016307579867, CurrSamplesPerSec=5.700846841961118, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:07:01,492] [INFO] [timer.py:197:stop] 0/7330, RunningAvgSamplesPerSec=6.337021253894769, CurrSamplesPerSec=5.730574254077691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:07:12,835] [INFO] [timer.py:197:stop] 0/7332, RunningAvgSamplesPerSec=6.337017987007978, CurrSamplesPerSec=5.69427109805156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:07:24,340] [INFO] [timer.py:197:stop] 0/7334, RunningAvgSamplesPerSec=6.33701377064967, CurrSamplesPerSec=5.674490623191548, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:07:35,692] [INFO] [timer.py:197:stop] 0/7336, RunningAvgSamplesPerSec=6.3370056649429936, CurrSamplesPerSec=5.662068594283095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:07:47,021] [INFO] [timer.py:197:stop] 0/7338, RunningAvgSamplesPerSec=6.337001658387362, CurrSamplesPerSec=5.669274554579328, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:07:58,322] [INFO] [logging.py:68:log_dist] [Rank 0] step=3670, skipped=5, lr=[2.968888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:07:58,324] [INFO] [timer.py:197:stop] 0/7340, RunningAvgSamplesPerSec=6.337001062046699, CurrSamplesPerSec=5.712809943913745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:08:09,665] [INFO] [timer.py:197:stop] 0/7342, RunningAvgSamplesPerSec=6.336995871237044, CurrSamplesPerSec=5.688212609339006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:08:20,929] [INFO] [timer.py:197:stop] 0/7344, RunningAvgSamplesPerSec=6.337002330629014, CurrSamplesPerSec=5.729670084605206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:08:32,215] [INFO] [timer.py:197:stop] 0/7346, RunningAvgSamplesPerSec=6.337005238715324, CurrSamplesPerSec=5.716175782152351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:08:43,581] [INFO] [timer.py:197:stop] 0/7348, RunningAvgSamplesPerSec=6.336986572370934, CurrSamplesPerSec=5.61405030428149, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:08:54,887] [INFO] [timer.py:197:stop] 0/7350, RunningAvgSamplesPerSec=6.336986932123496, CurrSamplesPerSec=5.699781619332736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0005, 'learning_rate': 2.957777777777778e-06, 'epoch': 15.57} +[2022-12-17 09:09:06,198] [INFO] [timer.py:197:stop] 0/7352, RunningAvgSamplesPerSec=6.336985037330815, CurrSamplesPerSec=5.692481289075769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:09:17,522] [INFO] [timer.py:197:stop] 0/7354, RunningAvgSamplesPerSec=6.336985224795389, CurrSamplesPerSec=5.705982438268532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:09:28,837] [INFO] [timer.py:197:stop] 0/7356, RunningAvgSamplesPerSec=6.336984555662266, CurrSamplesPerSec=5.7004093262177955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:09:40,143] [INFO] [timer.py:197:stop] 0/7358, RunningAvgSamplesPerSec=6.336987639653612, CurrSamplesPerSec=5.696448835138831, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:09:51,638] [INFO] [logging.py:68:log_dist] [Rank 0] step=3680, skipped=5, lr=[2.946666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:09:51,639] [INFO] [timer.py:197:stop] 0/7360, RunningAvgSamplesPerSec=6.336993962873559, CurrSamplesPerSec=5.725266829591983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:10:02,925] [INFO] [timer.py:197:stop] 0/7362, RunningAvgSamplesPerSec=6.336998437932194, CurrSamplesPerSec=5.7177191546152475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:10:14,232] [INFO] [timer.py:197:stop] 0/7364, RunningAvgSamplesPerSec=6.337000172759582, CurrSamplesPerSec=5.7215114782360414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:10:25,548] [INFO] [timer.py:197:stop] 0/7366, RunningAvgSamplesPerSec=6.337000912192302, CurrSamplesPerSec=5.706275487250284, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:10:36,893] [INFO] [timer.py:197:stop] 0/7368, RunningAvgSamplesPerSec=6.3369977244942906, CurrSamplesPerSec=5.695682534204909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:10:48,213] [INFO] [timer.py:197:stop] 0/7370, RunningAvgSamplesPerSec=6.336995604641772, CurrSamplesPerSec=5.693942564107785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:10:59,505] [INFO] [timer.py:197:stop] 0/7372, RunningAvgSamplesPerSec=6.336996428299475, CurrSamplesPerSec=5.698745588142197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:11:10,854] [INFO] [timer.py:197:stop] 0/7374, RunningAvgSamplesPerSec=6.336987047174027, CurrSamplesPerSec=5.656158488058211, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:11:22,367] [INFO] [timer.py:197:stop] 0/7376, RunningAvgSamplesPerSec=6.336986034032055, CurrSamplesPerSec=5.690582824841769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:11:33,654] [INFO] [timer.py:197:stop] 0/7378, RunningAvgSamplesPerSec=6.336989122120361, CurrSamplesPerSec=5.701948555590245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:11:44,963] [INFO] [logging.py:68:log_dist] [Rank 0] step=3690, skipped=5, lr=[2.9244444444444447e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:11:44,965] [INFO] [timer.py:197:stop] 0/7380, RunningAvgSamplesPerSec=6.336988774998277, CurrSamplesPerSec=5.695121113964519, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:11:56,248] [INFO] [timer.py:197:stop] 0/7382, RunningAvgSamplesPerSec=6.3369942119196905, CurrSamplesPerSec=5.716110052696255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:12:07,531] [INFO] [timer.py:197:stop] 0/7384, RunningAvgSamplesPerSec=6.336997165714012, CurrSamplesPerSec=5.709153235126749, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:12:18,813] [INFO] [timer.py:197:stop] 0/7386, RunningAvgSamplesPerSec=6.337000178406842, CurrSamplesPerSec=5.711433997607642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:12:30,103] [INFO] [timer.py:197:stop] 0/7388, RunningAvgSamplesPerSec=6.337001581801266, CurrSamplesPerSec=5.70525964811784, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:12:41,384] [INFO] [timer.py:197:stop] 0/7390, RunningAvgSamplesPerSec=6.337001689966199, CurrSamplesPerSec=5.6848923736187045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:12:52,662] [INFO] [timer.py:197:stop] 0/7392, RunningAvgSamplesPerSec=6.337008670186226, CurrSamplesPerSec=5.735854268715347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:13:03,969] [INFO] [timer.py:197:stop] 0/7394, RunningAvgSamplesPerSec=6.337009304181113, CurrSamplesPerSec=5.689410011081015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:13:15,253] [INFO] [timer.py:197:stop] 0/7396, RunningAvgSamplesPerSec=6.337016834734171, CurrSamplesPerSec=5.726278324108146, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:13:26,531] [INFO] [timer.py:197:stop] 0/7398, RunningAvgSamplesPerSec=6.337020752635934, CurrSamplesPerSec=5.7078642491937, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:13:37,856] [INFO] [logging.py:68:log_dist] [Rank 0] step=3700, skipped=5, lr=[2.9022222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:13:37,857] [INFO] [timer.py:197:stop] 0/7400, RunningAvgSamplesPerSec=6.337018429161538, CurrSamplesPerSec=5.684988690399899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.9022222222222223e-06, 'epoch': 15.68} +[2022-12-17 09:13:49,432] [INFO] [timer.py:197:stop] 0/7402, RunningAvgSamplesPerSec=6.337012024744687, CurrSamplesPerSec=5.651153310359503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:14:00,776] [INFO] [timer.py:197:stop] 0/7404, RunningAvgSamplesPerSec=6.33700586855207, CurrSamplesPerSec=5.6845610688138315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:14:12,080] [INFO] [timer.py:197:stop] 0/7406, RunningAvgSamplesPerSec=6.337005576354706, CurrSamplesPerSec=5.698727440987748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:14:23,372] [INFO] [timer.py:197:stop] 0/7408, RunningAvgSamplesPerSec=6.337009761658866, CurrSamplesPerSec=5.705296510834593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:14:34,676] [INFO] [timer.py:197:stop] 0/7410, RunningAvgSamplesPerSec=6.337011033623096, CurrSamplesPerSec=5.712568740747563, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:14:46,006] [INFO] [timer.py:197:stop] 0/7412, RunningAvgSamplesPerSec=6.337008244689118, CurrSamplesPerSec=5.688408605366097, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:14:57,348] [INFO] [timer.py:197:stop] 0/7414, RunningAvgSamplesPerSec=6.337007150563991, CurrSamplesPerSec=5.701993853875586, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:15:08,690] [INFO] [timer.py:197:stop] 0/7416, RunningAvgSamplesPerSec=6.337000523489746, CurrSamplesPerSec=5.6616062023435445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:15:20,019] [INFO] [timer.py:197:stop] 0/7418, RunningAvgSamplesPerSec=6.336998625385598, CurrSamplesPerSec=5.6962121542639474, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:15:31,362] [INFO] [logging.py:68:log_dist] [Rank 0] step=3710, skipped=5, lr=[2.88e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:15:31,364] [INFO] [timer.py:197:stop] 0/7420, RunningAvgSamplesPerSec=6.336993049210901, CurrSamplesPerSec=5.698096959630452, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:15:42,685] [INFO] [timer.py:197:stop] 0/7422, RunningAvgSamplesPerSec=6.336989874096959, CurrSamplesPerSec=5.682819945807793, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:15:54,010] [INFO] [timer.py:197:stop] 0/7424, RunningAvgSamplesPerSec=6.33698803333303, CurrSamplesPerSec=5.687665193832233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:16:05,350] [INFO] [timer.py:197:stop] 0/7426, RunningAvgSamplesPerSec=6.3369833530157305, CurrSamplesPerSec=5.68367689558089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:16:16,687] [INFO] [timer.py:197:stop] 0/7428, RunningAvgSamplesPerSec=6.3369800625802, CurrSamplesPerSec=5.6919600869033085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:16:27,978] [INFO] [timer.py:197:stop] 0/7430, RunningAvgSamplesPerSec=6.336984477677266, CurrSamplesPerSec=5.716490330696599, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:16:39,280] [INFO] [timer.py:197:stop] 0/7432, RunningAvgSamplesPerSec=6.336985455005384, CurrSamplesPerSec=5.721349533270731, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:16:50,560] [INFO] [timer.py:197:stop] 0/7434, RunningAvgSamplesPerSec=6.336990721761715, CurrSamplesPerSec=5.732729166972771, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:17:01,850] [INFO] [timer.py:197:stop] 0/7436, RunningAvgSamplesPerSec=6.336993886526433, CurrSamplesPerSec=5.714456358964536, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:17:13,123] [INFO] [timer.py:197:stop] 0/7438, RunningAvgSamplesPerSec=6.336996949363353, CurrSamplesPerSec=5.700044740715134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:17:24,419] [INFO] [logging.py:68:log_dist] [Rank 0] step=3720, skipped=5, lr=[2.8577777777777784e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:17:24,420] [INFO] [timer.py:197:stop] 0/7440, RunningAvgSamplesPerSec=6.337000812652505, CurrSamplesPerSec=5.7189904204649755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:17:35,676] [INFO] [timer.py:197:stop] 0/7442, RunningAvgSamplesPerSec=6.337010934063454, CurrSamplesPerSec=5.741173982588318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:17:46,934] [INFO] [timer.py:197:stop] 0/7444, RunningAvgSamplesPerSec=6.337018496823416, CurrSamplesPerSec=5.733557148901786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:17:58,194] [INFO] [timer.py:197:stop] 0/7446, RunningAvgSamplesPerSec=6.337025551190863, CurrSamplesPerSec=5.741582655421268, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:18:09,415] [INFO] [timer.py:197:stop] 0/7448, RunningAvgSamplesPerSec=6.3370357472866425, CurrSamplesPerSec=5.736453169862878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:18:20,674] [INFO] [timer.py:197:stop] 0/7450, RunningAvgSamplesPerSec=6.337047038533585, CurrSamplesPerSec=5.741463535036417, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.8466666666666672e-06, 'epoch': 15.78} +[2022-12-17 09:18:31,935] [INFO] [timer.py:197:stop] 0/7452, RunningAvgSamplesPerSec=6.3370536854770965, CurrSamplesPerSec=5.724098962654761, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:18:43,232] [INFO] [timer.py:197:stop] 0/7454, RunningAvgSamplesPerSec=6.3370561754818295, CurrSamplesPerSec=5.678187846450097, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:18:54,530] [INFO] [timer.py:197:stop] 0/7456, RunningAvgSamplesPerSec=6.337061318202993, CurrSamplesPerSec=5.726379468785904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:19:05,795] [INFO] [timer.py:197:stop] 0/7458, RunningAvgSamplesPerSec=6.3370713169833355, CurrSamplesPerSec=5.745115777043273, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:19:17,067] [INFO] [logging.py:68:log_dist] [Rank 0] step=3730, skipped=5, lr=[2.835555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:19:17,069] [INFO] [timer.py:197:stop] 0/7460, RunningAvgSamplesPerSec=6.337077577755489, CurrSamplesPerSec=5.742890357244891, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:19:28,385] [INFO] [timer.py:197:stop] 0/7462, RunningAvgSamplesPerSec=6.337077798063677, CurrSamplesPerSec=5.698841890975853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:19:39,613] [INFO] [timer.py:197:stop] 0/7464, RunningAvgSamplesPerSec=6.3370883772767055, CurrSamplesPerSec=5.72395420281479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:19:50,877] [INFO] [timer.py:197:stop] 0/7466, RunningAvgSamplesPerSec=6.337093881465289, CurrSamplesPerSec=5.739011490489101, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:20:02,139] [INFO] [timer.py:197:stop] 0/7468, RunningAvgSamplesPerSec=6.337099770288439, CurrSamplesPerSec=5.720503373606978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:20:13,435] [INFO] [timer.py:197:stop] 0/7470, RunningAvgSamplesPerSec=6.337101437823006, CurrSamplesPerSec=5.701043467475638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:20:24,750] [INFO] [timer.py:197:stop] 0/7472, RunningAvgSamplesPerSec=6.337101653412334, CurrSamplesPerSec=5.689918926821241, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:20:36,064] [INFO] [timer.py:197:stop] 0/7474, RunningAvgSamplesPerSec=6.337101541849059, CurrSamplesPerSec=5.70705410546408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:20:47,417] [INFO] [timer.py:197:stop] 0/7476, RunningAvgSamplesPerSec=6.337097632975739, CurrSamplesPerSec=5.686130051900902, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:20:58,700] [INFO] [timer.py:197:stop] 0/7478, RunningAvgSamplesPerSec=6.337102461458638, CurrSamplesPerSec=5.719335499291697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:21:09,975] [INFO] [logging.py:68:log_dist] [Rank 0] step=3740, skipped=5, lr=[2.8133333333333336e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:21:09,977] [INFO] [timer.py:197:stop] 0/7480, RunningAvgSamplesPerSec=6.337110095747103, CurrSamplesPerSec=5.7376940261095575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:21:21,284] [INFO] [timer.py:197:stop] 0/7482, RunningAvgSamplesPerSec=6.3371124749097545, CurrSamplesPerSec=5.720568716500787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:21:32,583] [INFO] [timer.py:197:stop] 0/7484, RunningAvgSamplesPerSec=6.33711631173176, CurrSamplesPerSec=5.715392239377726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:21:43,871] [INFO] [timer.py:197:stop] 0/7486, RunningAvgSamplesPerSec=6.337121037137045, CurrSamplesPerSec=5.723318371519071, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:21:55,141] [INFO] [timer.py:197:stop] 0/7488, RunningAvgSamplesPerSec=6.3371289155506805, CurrSamplesPerSec=5.741047266531711, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:22:06,422] [INFO] [timer.py:197:stop] 0/7490, RunningAvgSamplesPerSec=6.337135404090728, CurrSamplesPerSec=5.734627439020378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:22:17,699] [INFO] [timer.py:197:stop] 0/7492, RunningAvgSamplesPerSec=6.3371398018959875, CurrSamplesPerSec=5.733295577748234, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:22:28,964] [INFO] [timer.py:197:stop] 0/7494, RunningAvgSamplesPerSec=6.337148466188466, CurrSamplesPerSec=5.7207518308463285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:22:40,250] [INFO] [timer.py:197:stop] 0/7496, RunningAvgSamplesPerSec=6.337152944739803, CurrSamplesPerSec=5.720642350953582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:22:51,556] [INFO] [timer.py:197:stop] 0/7498, RunningAvgSamplesPerSec=6.337156582906381, CurrSamplesPerSec=5.714792374357171, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:23:02,854] [INFO] [logging.py:68:log_dist] [Rank 0] step=3750, skipped=5, lr=[2.7911111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:23:02,856] [INFO] [timer.py:197:stop] 0/7500, RunningAvgSamplesPerSec=6.337159180592687, CurrSamplesPerSec=5.709706009399266, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.7911111111111113e-06, 'epoch': 15.89} +[2022-12-17 09:23:14,199] [INFO] [timer.py:197:stop] 0/7502, RunningAvgSamplesPerSec=6.3371583100622, CurrSamplesPerSec=5.702817826897301, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:23:25,538] [INFO] [timer.py:197:stop] 0/7504, RunningAvgSamplesPerSec=6.337153460940718, CurrSamplesPerSec=5.684490286311387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:23:36,852] [INFO] [timer.py:197:stop] 0/7506, RunningAvgSamplesPerSec=6.337152546984216, CurrSamplesPerSec=5.708428427570143, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:23:48,184] [INFO] [timer.py:197:stop] 0/7508, RunningAvgSamplesPerSec=6.337149343188377, CurrSamplesPerSec=5.678220036147123, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:23:59,494] [INFO] [timer.py:197:stop] 0/7510, RunningAvgSamplesPerSec=6.3371502493584835, CurrSamplesPerSec=5.700620690961581, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:24:10,828] [INFO] [timer.py:197:stop] 0/7512, RunningAvgSamplesPerSec=6.337148006115891, CurrSamplesPerSec=5.699207532626838, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:24:22,127] [INFO] [timer.py:197:stop] 0/7514, RunningAvgSamplesPerSec=6.337147584269954, CurrSamplesPerSec=5.698130826917132, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:24:33,498] [INFO] [timer.py:197:stop] 0/7516, RunningAvgSamplesPerSec=6.337147803798282, CurrSamplesPerSec=5.695418848667418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:24:44,813] [INFO] [timer.py:197:stop] 0/7518, RunningAvgSamplesPerSec=6.337146998144934, CurrSamplesPerSec=5.694066967722256, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:24:56,096] [INFO] [logging.py:68:log_dist] [Rank 0] step=3760, skipped=5, lr=[2.7688888888888893e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:24:56,098] [INFO] [timer.py:197:stop] 0/7520, RunningAvgSamplesPerSec=6.33715050800144, CurrSamplesPerSec=5.697948432141898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:25:07,381] [INFO] [timer.py:197:stop] 0/7522, RunningAvgSamplesPerSec=6.337155380512161, CurrSamplesPerSec=5.716553147168683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:25:18,691] [INFO] [timer.py:197:stop] 0/7524, RunningAvgSamplesPerSec=6.337157343363257, CurrSamplesPerSec=5.6871434272953865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:25:30,005] [INFO] [timer.py:197:stop] 0/7526, RunningAvgSamplesPerSec=6.337156740513725, CurrSamplesPerSec=5.693342604366141, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:25:41,332] [INFO] [timer.py:197:stop] 0/7528, RunningAvgSamplesPerSec=6.337153983110774, CurrSamplesPerSec=5.688243225294438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:25:52,613] [INFO] [timer.py:197:stop] 0/7530, RunningAvgSamplesPerSec=6.337159190948649, CurrSamplesPerSec=5.726155440575434, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:26:04,062] [INFO] [timer.py:197:stop] 0/7532, RunningAvgSamplesPerSec=6.337157782718983, CurrSamplesPerSec=5.69249263636342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:26:15,334] [INFO] [timer.py:197:stop] 0/7534, RunningAvgSamplesPerSec=6.337161263495991, CurrSamplesPerSec=5.708837065279713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:26:26,628] [INFO] [timer.py:197:stop] 0/7536, RunningAvgSamplesPerSec=6.337161522577037, CurrSamplesPerSec=5.694562462074853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:26:37,945] [INFO] [timer.py:197:stop] 0/7538, RunningAvgSamplesPerSec=6.33716089579459, CurrSamplesPerSec=5.689047554579456, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:26:49,241] [INFO] [logging.py:68:log_dist] [Rank 0] step=3770, skipped=5, lr=[2.746666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:26:49,242] [INFO] [timer.py:197:stop] 0/7540, RunningAvgSamplesPerSec=6.33716380996003, CurrSamplesPerSec=5.709197919404089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:27:00,533] [INFO] [timer.py:197:stop] 0/7542, RunningAvgSamplesPerSec=6.337165489777445, CurrSamplesPerSec=5.713306516974537, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:27:11,827] [INFO] [timer.py:197:stop] 0/7544, RunningAvgSamplesPerSec=6.337169139932689, CurrSamplesPerSec=5.712809457596547, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:27:23,133] [INFO] [timer.py:197:stop] 0/7546, RunningAvgSamplesPerSec=6.337170620485615, CurrSamplesPerSec=5.73317018880441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:27:34,433] [INFO] [timer.py:197:stop] 0/7548, RunningAvgSamplesPerSec=6.337173591365556, CurrSamplesPerSec=5.7136450726788635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:27:45,742] [INFO] [timer.py:197:stop] 0/7550, RunningAvgSamplesPerSec=6.33717364908794, CurrSamplesPerSec=5.71643944549684, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.7355555555555557e-06, 'epoch': 16.0} +[2022-12-17 09:27:54,445] [INFO] [timer.py:197:stop] 0/7552, RunningAvgSamplesPerSec=6.337586792581298, CurrSamplesPerSec=10.26715855822542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:28:05,728] [INFO] [timer.py:197:stop] 0/7554, RunningAvgSamplesPerSec=6.337591516051901, CurrSamplesPerSec=5.717267355524845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:28:17,013] [INFO] [timer.py:197:stop] 0/7556, RunningAvgSamplesPerSec=6.337596250854084, CurrSamplesPerSec=5.703340534797298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:28:28,297] [INFO] [timer.py:197:stop] 0/7558, RunningAvgSamplesPerSec=6.337600728695917, CurrSamplesPerSec=5.724571863274335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:28:39,600] [INFO] [logging.py:68:log_dist] [Rank 0] step=3780, skipped=5, lr=[2.7244444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:28:39,601] [INFO] [timer.py:197:stop] 0/7560, RunningAvgSamplesPerSec=6.337601870922737, CurrSamplesPerSec=5.703217906724676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:28:50,900] [INFO] [timer.py:197:stop] 0/7562, RunningAvgSamplesPerSec=6.337602270427081, CurrSamplesPerSec=5.701695431361674, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:29:02,201] [INFO] [timer.py:197:stop] 0/7564, RunningAvgSamplesPerSec=6.337604562714723, CurrSamplesPerSec=5.7021777190112815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:29:13,539] [INFO] [timer.py:197:stop] 0/7566, RunningAvgSamplesPerSec=6.3376015213372705, CurrSamplesPerSec=5.695463801578938, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:29:24,829] [INFO] [timer.py:197:stop] 0/7568, RunningAvgSamplesPerSec=6.337605068679528, CurrSamplesPerSec=5.717699424969985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:29:36,142] [INFO] [timer.py:197:stop] 0/7570, RunningAvgSamplesPerSec=6.3376098008097355, CurrSamplesPerSec=5.752060729281249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:29:47,468] [INFO] [timer.py:197:stop] 0/7572, RunningAvgSamplesPerSec=6.337607647820765, CurrSamplesPerSec=5.700316601843686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:29:58,780] [INFO] [timer.py:197:stop] 0/7574, RunningAvgSamplesPerSec=6.337607350186396, CurrSamplesPerSec=5.697775966126229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:30:10,113] [INFO] [timer.py:197:stop] 0/7576, RunningAvgSamplesPerSec=6.337607612379269, CurrSamplesPerSec=5.7026789873583805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:30:21,506] [INFO] [timer.py:197:stop] 0/7578, RunningAvgSamplesPerSec=6.337595988367939, CurrSamplesPerSec=5.628938079626455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:30:32,852] [INFO] [logging.py:68:log_dist] [Rank 0] step=3790, skipped=5, lr=[2.702222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:30:32,854] [INFO] [timer.py:197:stop] 0/7580, RunningAvgSamplesPerSec=6.337594479460954, CurrSamplesPerSec=5.707124965658659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:30:44,120] [INFO] [timer.py:197:stop] 0/7582, RunningAvgSamplesPerSec=6.337595931550774, CurrSamplesPerSec=5.7019829531529265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:30:55,441] [INFO] [timer.py:197:stop] 0/7584, RunningAvgSamplesPerSec=6.337594274854377, CurrSamplesPerSec=5.700950238170126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:31:06,750] [INFO] [timer.py:197:stop] 0/7586, RunningAvgSamplesPerSec=6.337596159526293, CurrSamplesPerSec=5.719249225680997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:31:18,069] [INFO] [timer.py:197:stop] 0/7588, RunningAvgSamplesPerSec=6.337597267773341, CurrSamplesPerSec=5.703781651700679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:31:29,385] [INFO] [timer.py:197:stop] 0/7590, RunningAvgSamplesPerSec=6.33759872193673, CurrSamplesPerSec=5.701558584081337, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:31:40,639] [INFO] [timer.py:197:stop] 0/7592, RunningAvgSamplesPerSec=6.337606926848249, CurrSamplesPerSec=5.742255717046693, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:31:51,898] [INFO] [timer.py:197:stop] 0/7594, RunningAvgSamplesPerSec=6.337610414269546, CurrSamplesPerSec=5.730849769348819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:32:03,166] [INFO] [timer.py:197:stop] 0/7596, RunningAvgSamplesPerSec=6.3376154535103, CurrSamplesPerSec=5.706872837563713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:32:14,441] [INFO] [timer.py:197:stop] 0/7598, RunningAvgSamplesPerSec=6.337619653048147, CurrSamplesPerSec=5.705567660454909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:32:25,749] [INFO] [logging.py:68:log_dist] [Rank 0] step=3800, skipped=5, lr=[2.68e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:32:25,750] [INFO] [timer.py:197:stop] 0/7600, RunningAvgSamplesPerSec=6.337619378832625, CurrSamplesPerSec=5.702209454509983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.68e-06, 'epoch': 16.1} +[2022-12-17 09:32:37,105] [INFO] [timer.py:197:stop] 0/7602, RunningAvgSamplesPerSec=6.337616036998575, CurrSamplesPerSec=5.679462020335036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:32:48,383] [INFO] [timer.py:197:stop] 0/7604, RunningAvgSamplesPerSec=6.337618781863973, CurrSamplesPerSec=5.723297138899976, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:32:59,664] [INFO] [timer.py:197:stop] 0/7606, RunningAvgSamplesPerSec=6.337624220363036, CurrSamplesPerSec=5.727346386999702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:33:10,968] [INFO] [timer.py:197:stop] 0/7608, RunningAvgSamplesPerSec=6.337626727971518, CurrSamplesPerSec=5.71430113854971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:33:22,282] [INFO] [timer.py:197:stop] 0/7610, RunningAvgSamplesPerSec=6.337628377283658, CurrSamplesPerSec=5.720003598608851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:33:33,565] [INFO] [timer.py:197:stop] 0/7612, RunningAvgSamplesPerSec=6.337630500529432, CurrSamplesPerSec=5.716670992646742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:33:44,873] [INFO] [timer.py:197:stop] 0/7614, RunningAvgSamplesPerSec=6.337631753825161, CurrSamplesPerSec=5.704467699806805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:33:56,157] [INFO] [timer.py:197:stop] 0/7616, RunningAvgSamplesPerSec=6.337635793398005, CurrSamplesPerSec=5.718958497903887, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:34:07,617] [INFO] [timer.py:197:stop] 0/7618, RunningAvgSamplesPerSec=6.337639489002845, CurrSamplesPerSec=5.716124902552305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:34:18,908] [INFO] [logging.py:68:log_dist] [Rank 0] step=3810, skipped=5, lr=[2.6577777777777782e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:34:18,910] [INFO] [timer.py:197:stop] 0/7620, RunningAvgSamplesPerSec=6.337641366147274, CurrSamplesPerSec=5.704450001085912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:34:30,171] [INFO] [timer.py:197:stop] 0/7622, RunningAvgSamplesPerSec=6.337645186424332, CurrSamplesPerSec=5.700668631537744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:34:41,463] [INFO] [timer.py:197:stop] 0/7624, RunningAvgSamplesPerSec=6.337648983931281, CurrSamplesPerSec=5.7186176066698735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:34:52,784] [INFO] [timer.py:197:stop] 0/7626, RunningAvgSamplesPerSec=6.337647700728763, CurrSamplesPerSec=5.692582691973853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:35:04,083] [INFO] [timer.py:197:stop] 0/7628, RunningAvgSamplesPerSec=6.337648358748499, CurrSamplesPerSec=5.692907929928287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:35:15,386] [INFO] [timer.py:197:stop] 0/7630, RunningAvgSamplesPerSec=6.337647574349222, CurrSamplesPerSec=5.688455617491631, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:35:26,690] [INFO] [timer.py:197:stop] 0/7632, RunningAvgSamplesPerSec=6.337649406499528, CurrSamplesPerSec=5.729970708860434, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:35:37,991] [INFO] [timer.py:197:stop] 0/7634, RunningAvgSamplesPerSec=6.337650791547396, CurrSamplesPerSec=5.705103471891794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:35:49,289] [INFO] [timer.py:197:stop] 0/7636, RunningAvgSamplesPerSec=6.3376497688644635, CurrSamplesPerSec=5.694385610703284, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:36:00,627] [INFO] [timer.py:197:stop] 0/7638, RunningAvgSamplesPerSec=6.337650054425621, CurrSamplesPerSec=5.708650342395094, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:36:11,924] [INFO] [logging.py:68:log_dist] [Rank 0] step=3820, skipped=5, lr=[2.635555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:36:11,926] [INFO] [timer.py:197:stop] 0/7640, RunningAvgSamplesPerSec=6.337651283414945, CurrSamplesPerSec=5.7059324676879415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:36:23,223] [INFO] [timer.py:197:stop] 0/7642, RunningAvgSamplesPerSec=6.337653160494952, CurrSamplesPerSec=5.708989803483139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:36:34,508] [INFO] [timer.py:197:stop] 0/7644, RunningAvgSamplesPerSec=6.337658097402001, CurrSamplesPerSec=5.720769874705262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:36:45,822] [INFO] [timer.py:197:stop] 0/7646, RunningAvgSamplesPerSec=6.337658170056963, CurrSamplesPerSec=5.7108541594545805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:36:57,126] [INFO] [timer.py:197:stop] 0/7648, RunningAvgSamplesPerSec=6.337659849422356, CurrSamplesPerSec=5.709940412176869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:37:08,413] [INFO] [timer.py:197:stop] 0/7650, RunningAvgSamplesPerSec=6.337664868797306, CurrSamplesPerSec=5.725524981982122, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.6244444444444446e-06, 'epoch': 16.21} +[2022-12-17 09:37:19,685] [INFO] [timer.py:197:stop] 0/7652, RunningAvgSamplesPerSec=6.337667092967805, CurrSamplesPerSec=5.708157248742415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:37:31,018] [INFO] [timer.py:197:stop] 0/7654, RunningAvgSamplesPerSec=6.3376648265658115, CurrSamplesPerSec=5.675200838364229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:37:42,363] [INFO] [timer.py:197:stop] 0/7656, RunningAvgSamplesPerSec=6.337659805455543, CurrSamplesPerSec=5.673781065449453, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:37:53,671] [INFO] [timer.py:197:stop] 0/7658, RunningAvgSamplesPerSec=6.33765906744692, CurrSamplesPerSec=5.708310436122384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:38:04,934] [INFO] [logging.py:68:log_dist] [Rank 0] step=3830, skipped=5, lr=[2.6133333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:38:04,934] [INFO] [timer.py:197:stop] 0/7660, RunningAvgSamplesPerSec=6.337663550102813, CurrSamplesPerSec=5.717441247099751, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:38:16,207] [INFO] [timer.py:197:stop] 0/7662, RunningAvgSamplesPerSec=6.3376692239885655, CurrSamplesPerSec=5.717891855717364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:38:27,511] [INFO] [timer.py:197:stop] 0/7664, RunningAvgSamplesPerSec=6.337673040340945, CurrSamplesPerSec=5.71155503484099, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:38:38,957] [INFO] [timer.py:197:stop] 0/7666, RunningAvgSamplesPerSec=6.337682342427288, CurrSamplesPerSec=5.738175554205899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:38:50,284] [INFO] [timer.py:197:stop] 0/7668, RunningAvgSamplesPerSec=6.3376767349966565, CurrSamplesPerSec=5.668561273086992, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:39:01,591] [INFO] [timer.py:197:stop] 0/7670, RunningAvgSamplesPerSec=6.337678125002861, CurrSamplesPerSec=5.710234110798286, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:39:12,845] [INFO] [timer.py:197:stop] 0/7672, RunningAvgSamplesPerSec=6.337686190629664, CurrSamplesPerSec=5.736208004868038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:39:24,113] [INFO] [timer.py:197:stop] 0/7674, RunningAvgSamplesPerSec=6.337692827180317, CurrSamplesPerSec=5.722016639575232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:39:35,394] [INFO] [timer.py:197:stop] 0/7676, RunningAvgSamplesPerSec=6.337697913741528, CurrSamplesPerSec=5.71689793192338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:39:46,712] [INFO] [timer.py:197:stop] 0/7678, RunningAvgSamplesPerSec=6.337697349513364, CurrSamplesPerSec=5.699035474357622, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:39:57,972] [INFO] [logging.py:68:log_dist] [Rank 0] step=3840, skipped=5, lr=[2.5911111111111115e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:39:57,974] [INFO] [timer.py:197:stop] 0/7680, RunningAvgSamplesPerSec=6.3377044241616645, CurrSamplesPerSec=5.7285742595547315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:40:09,260] [INFO] [timer.py:197:stop] 0/7682, RunningAvgSamplesPerSec=6.337711552344175, CurrSamplesPerSec=5.7456888196643385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:40:20,570] [INFO] [timer.py:197:stop] 0/7684, RunningAvgSamplesPerSec=6.337709510302982, CurrSamplesPerSec=5.69719889770791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:40:32,111] [INFO] [timer.py:197:stop] 0/7686, RunningAvgSamplesPerSec=6.337710909695745, CurrSamplesPerSec=5.723016492646554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:40:43,391] [INFO] [timer.py:197:stop] 0/7688, RunningAvgSamplesPerSec=6.337714796469835, CurrSamplesPerSec=5.713155979588083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:40:54,674] [INFO] [timer.py:197:stop] 0/7690, RunningAvgSamplesPerSec=6.33771834844587, CurrSamplesPerSec=5.721193206541384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:41:05,945] [INFO] [timer.py:197:stop] 0/7692, RunningAvgSamplesPerSec=6.337724939606327, CurrSamplesPerSec=5.731976574601302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:41:17,229] [INFO] [timer.py:197:stop] 0/7694, RunningAvgSamplesPerSec=6.337727410771224, CurrSamplesPerSec=5.7024446958230435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:41:28,486] [INFO] [timer.py:197:stop] 0/7696, RunningAvgSamplesPerSec=6.33772906309757, CurrSamplesPerSec=5.68873963547455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:41:39,728] [INFO] [timer.py:197:stop] 0/7698, RunningAvgSamplesPerSec=6.3377401774351805, CurrSamplesPerSec=5.753686191763893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:41:50,987] [INFO] [logging.py:68:log_dist] [Rank 0] step=3850, skipped=5, lr=[2.568888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:41:50,988] [INFO] [timer.py:197:stop] 0/7700, RunningAvgSamplesPerSec=6.337747795509742, CurrSamplesPerSec=5.752716277848127, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.568888888888889e-06, 'epoch': 16.31} +[2022-12-17 09:42:02,271] [INFO] [timer.py:197:stop] 0/7702, RunningAvgSamplesPerSec=6.337754141135462, CurrSamplesPerSec=5.726597894844452, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:42:13,528] [INFO] [timer.py:197:stop] 0/7704, RunningAvgSamplesPerSec=6.337759938911315, CurrSamplesPerSec=5.719015032759193, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:42:24,801] [INFO] [timer.py:197:stop] 0/7706, RunningAvgSamplesPerSec=6.3377667375846105, CurrSamplesPerSec=5.7230045352871075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:42:36,099] [INFO] [timer.py:197:stop] 0/7708, RunningAvgSamplesPerSec=6.3377724764819705, CurrSamplesPerSec=5.71709298748041, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:42:47,308] [INFO] [timer.py:197:stop] 0/7710, RunningAvgSamplesPerSec=6.337785431762316, CurrSamplesPerSec=5.740814968288539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:42:58,557] [INFO] [timer.py:197:stop] 0/7712, RunningAvgSamplesPerSec=6.337794374665492, CurrSamplesPerSec=5.733962778338685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:43:09,834] [INFO] [timer.py:197:stop] 0/7714, RunningAvgSamplesPerSec=6.3377989705497155, CurrSamplesPerSec=5.716045541922188, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:43:21,082] [INFO] [timer.py:197:stop] 0/7716, RunningAvgSamplesPerSec=6.337809588204668, CurrSamplesPerSec=5.740360248559839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:43:32,353] [INFO] [timer.py:197:stop] 0/7718, RunningAvgSamplesPerSec=6.337817725751959, CurrSamplesPerSec=5.728971114518445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:43:43,607] [INFO] [logging.py:68:log_dist] [Rank 0] step=3860, skipped=5, lr=[2.5466666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:43:43,609] [INFO] [timer.py:197:stop] 0/7720, RunningAvgSamplesPerSec=6.337825494644621, CurrSamplesPerSec=5.721367824787967, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:43:54,934] [INFO] [timer.py:197:stop] 0/7722, RunningAvgSamplesPerSec=6.337822737480995, CurrSamplesPerSec=5.683201341972973, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:44:06,208] [INFO] [timer.py:197:stop] 0/7724, RunningAvgSamplesPerSec=6.337829661991261, CurrSamplesPerSec=5.716317227330523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:44:17,509] [INFO] [timer.py:197:stop] 0/7726, RunningAvgSamplesPerSec=6.337831393646679, CurrSamplesPerSec=5.712366943264196, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:44:28,735] [INFO] [timer.py:197:stop] 0/7728, RunningAvgSamplesPerSec=6.337842682733193, CurrSamplesPerSec=5.748709634408516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:44:40,031] [INFO] [timer.py:197:stop] 0/7730, RunningAvgSamplesPerSec=6.337845932926097, CurrSamplesPerSec=5.708473100502343, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:44:51,331] [INFO] [timer.py:197:stop] 0/7732, RunningAvgSamplesPerSec=6.337849432167574, CurrSamplesPerSec=5.712740158242432, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:45:02,541] [INFO] [timer.py:197:stop] 0/7734, RunningAvgSamplesPerSec=6.337862880216422, CurrSamplesPerSec=5.74519471732774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:45:13,775] [INFO] [timer.py:197:stop] 0/7736, RunningAvgSamplesPerSec=6.337872213835417, CurrSamplesPerSec=5.744988641020949, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:45:25,099] [INFO] [timer.py:197:stop] 0/7738, RunningAvgSamplesPerSec=6.337871356149751, CurrSamplesPerSec=5.6864791268958745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:45:36,327] [INFO] [logging.py:68:log_dist] [Rank 0] step=3870, skipped=5, lr=[2.5244444444444447e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:45:36,328] [INFO] [timer.py:197:stop] 0/7740, RunningAvgSamplesPerSec=6.337880462359573, CurrSamplesPerSec=5.733127822295487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:45:47,545] [INFO] [timer.py:197:stop] 0/7742, RunningAvgSamplesPerSec=6.337888450710517, CurrSamplesPerSec=5.737256477188804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:45:58,848] [INFO] [timer.py:197:stop] 0/7744, RunningAvgSamplesPerSec=6.337887695526293, CurrSamplesPerSec=5.702028251984804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:46:10,119] [INFO] [timer.py:197:stop] 0/7746, RunningAvgSamplesPerSec=6.33789452116026, CurrSamplesPerSec=5.7230445560333365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:46:21,408] [INFO] [timer.py:197:stop] 0/7748, RunningAvgSamplesPerSec=6.337898703797568, CurrSamplesPerSec=5.709751916821519, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:46:32,706] [INFO] [timer.py:197:stop] 0/7750, RunningAvgSamplesPerSec=6.337900434978689, CurrSamplesPerSec=5.708300482334463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.5133333333333336e-06, 'epoch': 16.42} +[2022-12-17 09:46:43,980] [INFO] [timer.py:197:stop] 0/7752, RunningAvgSamplesPerSec=6.337906537766826, CurrSamplesPerSec=5.7340281841099365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:46:55,267] [INFO] [timer.py:197:stop] 0/7754, RunningAvgSamplesPerSec=6.337911639304656, CurrSamplesPerSec=5.730378032888709, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:47:06,521] [INFO] [timer.py:197:stop] 0/7756, RunningAvgSamplesPerSec=6.337919004617038, CurrSamplesPerSec=5.733004154853874, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:47:17,798] [INFO] [timer.py:197:stop] 0/7758, RunningAvgSamplesPerSec=6.337926206357161, CurrSamplesPerSec=5.724206621853158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:47:29,117] [INFO] [logging.py:68:log_dist] [Rank 0] step=3880, skipped=5, lr=[2.5022222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:47:29,119] [INFO] [timer.py:197:stop] 0/7760, RunningAvgSamplesPerSec=6.337930726765982, CurrSamplesPerSec=5.728880882035514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:47:40,387] [INFO] [timer.py:197:stop] 0/7762, RunningAvgSamplesPerSec=6.337939116628576, CurrSamplesPerSec=5.725962453227796, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:47:51,660] [INFO] [timer.py:197:stop] 0/7764, RunningAvgSamplesPerSec=6.33794288021509, CurrSamplesPerSec=5.707184178951615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:48:02,949] [INFO] [timer.py:197:stop] 0/7766, RunningAvgSamplesPerSec=6.337948337593651, CurrSamplesPerSec=5.722645837736495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:48:14,247] [INFO] [timer.py:197:stop] 0/7768, RunningAvgSamplesPerSec=6.337953765039836, CurrSamplesPerSec=5.714051538449913, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:48:25,521] [INFO] [timer.py:197:stop] 0/7770, RunningAvgSamplesPerSec=6.33796009831342, CurrSamplesPerSec=5.717910855911447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:48:36,804] [INFO] [timer.py:197:stop] 0/7772, RunningAvgSamplesPerSec=6.337966152970789, CurrSamplesPerSec=5.726004713901616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:48:48,058] [INFO] [timer.py:197:stop] 0/7774, RunningAvgSamplesPerSec=6.337976220513363, CurrSamplesPerSec=5.741714553242598, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:48:59,319] [INFO] [timer.py:197:stop] 0/7776, RunningAvgSamplesPerSec=6.337985161512416, CurrSamplesPerSec=5.738442723091509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:49:10,550] [INFO] [timer.py:197:stop] 0/7778, RunningAvgSamplesPerSec=6.337993629409863, CurrSamplesPerSec=5.737967528077485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:49:21,821] [INFO] [logging.py:68:log_dist] [Rank 0] step=3890, skipped=5, lr=[2.4800000000000004e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:49:21,822] [INFO] [timer.py:197:stop] 0/7780, RunningAvgSamplesPerSec=6.33799976467172, CurrSamplesPerSec=5.717746922494803, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:49:33,124] [INFO] [timer.py:197:stop] 0/7782, RunningAvgSamplesPerSec=6.338001283847643, CurrSamplesPerSec=5.689798322443387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:49:44,402] [INFO] [timer.py:197:stop] 0/7784, RunningAvgSamplesPerSec=6.33800723289743, CurrSamplesPerSec=5.7221383697188966, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:49:55,649] [INFO] [timer.py:197:stop] 0/7786, RunningAvgSamplesPerSec=6.3380153543891415, CurrSamplesPerSec=5.725017003230027, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:50:06,914] [INFO] [timer.py:197:stop] 0/7788, RunningAvgSamplesPerSec=6.3380232131434076, CurrSamplesPerSec=5.736539963293558, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:50:18,235] [INFO] [timer.py:197:stop] 0/7790, RunningAvgSamplesPerSec=6.338021456688827, CurrSamplesPerSec=5.689594512665814, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:50:29,538] [INFO] [timer.py:197:stop] 0/7792, RunningAvgSamplesPerSec=6.338022973657751, CurrSamplesPerSec=5.690050391416976, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:50:40,864] [INFO] [timer.py:197:stop] 0/7794, RunningAvgSamplesPerSec=6.338024076398991, CurrSamplesPerSec=5.691871257887523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:50:52,147] [INFO] [timer.py:197:stop] 0/7796, RunningAvgSamplesPerSec=6.338026648126305, CurrSamplesPerSec=5.698377344512056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:51:03,459] [INFO] [timer.py:197:stop] 0/7798, RunningAvgSamplesPerSec=6.33802682372121, CurrSamplesPerSec=5.704365145730466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:51:14,724] [INFO] [logging.py:68:log_dist] [Rank 0] step=3900, skipped=5, lr=[2.457777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:51:14,726] [INFO] [timer.py:197:stop] 0/7800, RunningAvgSamplesPerSec=6.338033175561862, CurrSamplesPerSec=5.723243447922379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.457777777777778e-06, 'epoch': 16.53} +[2022-12-17 09:51:26,011] [INFO] [timer.py:197:stop] 0/7802, RunningAvgSamplesPerSec=6.338037345556911, CurrSamplesPerSec=5.70654624468292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:51:37,311] [INFO] [timer.py:197:stop] 0/7804, RunningAvgSamplesPerSec=6.338038981952448, CurrSamplesPerSec=5.70911413695785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:51:48,585] [INFO] [timer.py:197:stop] 0/7806, RunningAvgSamplesPerSec=6.3380445896973985, CurrSamplesPerSec=5.721638552918464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:51:59,954] [INFO] [timer.py:197:stop] 0/7808, RunningAvgSamplesPerSec=6.338035527632987, CurrSamplesPerSec=5.628274561626016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:52:11,222] [INFO] [timer.py:197:stop] 0/7810, RunningAvgSamplesPerSec=6.3380415482884045, CurrSamplesPerSec=5.719435667681271, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:52:22,498] [INFO] [timer.py:197:stop] 0/7812, RunningAvgSamplesPerSec=6.338044134123395, CurrSamplesPerSec=5.724492512077532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:52:33,735] [INFO] [timer.py:197:stop] 0/7814, RunningAvgSamplesPerSec=6.338052879421057, CurrSamplesPerSec=5.733898598845058, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:52:44,969] [INFO] [timer.py:197:stop] 0/7816, RunningAvgSamplesPerSec=6.338062080626535, CurrSamplesPerSec=5.728037382499521, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:52:56,251] [INFO] [timer.py:197:stop] 0/7818, RunningAvgSamplesPerSec=6.33806568143163, CurrSamplesPerSec=5.7105181210758404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:53:07,526] [INFO] [logging.py:68:log_dist] [Rank 0] step=3910, skipped=5, lr=[2.4355555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:53:07,528] [INFO] [timer.py:197:stop] 0/7820, RunningAvgSamplesPerSec=6.33807126366113, CurrSamplesPerSec=5.728249334232319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:53:18,806] [INFO] [timer.py:197:stop] 0/7822, RunningAvgSamplesPerSec=6.338075961797504, CurrSamplesPerSec=5.7384353627338704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:53:30,101] [INFO] [timer.py:197:stop] 0/7824, RunningAvgSamplesPerSec=6.338077998092185, CurrSamplesPerSec=5.723391344595013, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:53:41,400] [INFO] [timer.py:197:stop] 0/7826, RunningAvgSamplesPerSec=6.338081091758629, CurrSamplesPerSec=5.7249855017470415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:53:52,680] [INFO] [timer.py:197:stop] 0/7828, RunningAvgSamplesPerSec=6.338085376762099, CurrSamplesPerSec=5.725090752243677, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:54:03,987] [INFO] [timer.py:197:stop] 0/7830, RunningAvgSamplesPerSec=6.33808473099834, CurrSamplesPerSec=5.700445399961691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:54:15,313] [INFO] [timer.py:197:stop] 0/7832, RunningAvgSamplesPerSec=6.338078814876594, CurrSamplesPerSec=5.667699780297962, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:54:26,688] [INFO] [timer.py:197:stop] 0/7834, RunningAvgSamplesPerSec=6.338073448259471, CurrSamplesPerSec=5.684280356739311, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:54:38,001] [INFO] [timer.py:197:stop] 0/7836, RunningAvgSamplesPerSec=6.338072425133514, CurrSamplesPerSec=5.704623356301539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:54:49,313] [INFO] [timer.py:197:stop] 0/7838, RunningAvgSamplesPerSec=6.338070988951457, CurrSamplesPerSec=5.680800482440264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:55:00,628] [INFO] [logging.py:68:log_dist] [Rank 0] step=3920, skipped=5, lr=[2.4133333333333337e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:55:00,630] [INFO] [timer.py:197:stop] 0/7840, RunningAvgSamplesPerSec=6.338069360016892, CurrSamplesPerSec=5.7083890965455835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:55:11,939] [INFO] [timer.py:197:stop] 0/7842, RunningAvgSamplesPerSec=6.338068903567028, CurrSamplesPerSec=5.700775169038363, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:55:23,211] [INFO] [timer.py:197:stop] 0/7844, RunningAvgSamplesPerSec=6.338071817909554, CurrSamplesPerSec=5.725986392723651, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:55:34,467] [INFO] [timer.py:197:stop] 0/7846, RunningAvgSamplesPerSec=6.338080453163421, CurrSamplesPerSec=5.7490021637294015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:55:45,759] [INFO] [timer.py:197:stop] 0/7848, RunningAvgSamplesPerSec=6.338083687906425, CurrSamplesPerSec=5.71646500971841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:55:57,056] [INFO] [timer.py:197:stop] 0/7850, RunningAvgSamplesPerSec=6.338086576282932, CurrSamplesPerSec=5.7301632325805985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.4022222222222225e-06, 'epoch': 16.63} +[2022-12-17 09:56:08,361] [INFO] [timer.py:197:stop] 0/7852, RunningAvgSamplesPerSec=6.33808753398256, CurrSamplesPerSec=5.713277819413265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:56:19,624] [INFO] [timer.py:197:stop] 0/7854, RunningAvgSamplesPerSec=6.33809495047022, CurrSamplesPerSec=5.737719290220738, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:56:30,898] [INFO] [timer.py:197:stop] 0/7856, RunningAvgSamplesPerSec=6.338100536942322, CurrSamplesPerSec=5.726581524528675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:56:42,177] [INFO] [timer.py:197:stop] 0/7858, RunningAvgSamplesPerSec=6.338105385584027, CurrSamplesPerSec=5.7167106813704445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:56:53,435] [INFO] [logging.py:68:log_dist] [Rank 0] step=3930, skipped=5, lr=[2.3911111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:56:53,437] [INFO] [timer.py:197:stop] 0/7860, RunningAvgSamplesPerSec=6.338108012024771, CurrSamplesPerSec=5.7049114159581595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:57:04,775] [INFO] [timer.py:197:stop] 0/7862, RunningAvgSamplesPerSec=6.338111543072177, CurrSamplesPerSec=5.713287060969322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:57:16,102] [INFO] [timer.py:197:stop] 0/7864, RunningAvgSamplesPerSec=6.338110406074132, CurrSamplesPerSec=5.695581504230151, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:57:27,393] [INFO] [timer.py:197:stop] 0/7866, RunningAvgSamplesPerSec=6.338113753744858, CurrSamplesPerSec=5.711950993677978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:57:38,762] [INFO] [timer.py:197:stop] 0/7868, RunningAvgSamplesPerSec=6.338113012863938, CurrSamplesPerSec=5.679170036729898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:57:50,071] [INFO] [timer.py:197:stop] 0/7870, RunningAvgSamplesPerSec=6.3381116004514855, CurrSamplesPerSec=5.700562098019049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:58:01,489] [INFO] [timer.py:197:stop] 0/7872, RunningAvgSamplesPerSec=6.338111905656464, CurrSamplesPerSec=5.707745795611867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:58:12,762] [INFO] [timer.py:197:stop] 0/7874, RunningAvgSamplesPerSec=6.338112775738269, CurrSamplesPerSec=5.715166393060473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:58:24,035] [INFO] [timer.py:197:stop] 0/7876, RunningAvgSamplesPerSec=6.338115497792184, CurrSamplesPerSec=5.718213901715182, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:58:35,415] [INFO] [timer.py:197:stop] 0/7878, RunningAvgSamplesPerSec=6.338118462172044, CurrSamplesPerSec=5.709587722247295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:58:46,800] [INFO] [logging.py:68:log_dist] [Rank 0] step=3940, skipped=5, lr=[2.3688888888888893e-06], mom=[[0.9, 0.999]] +[2022-12-17 09:58:46,802] [INFO] [timer.py:197:stop] 0/7880, RunningAvgSamplesPerSec=6.3381177443984775, CurrSamplesPerSec=5.683592897785402, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:58:58,116] [INFO] [timer.py:197:stop] 0/7882, RunningAvgSamplesPerSec=6.338118981307474, CurrSamplesPerSec=5.693691116312811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:59:09,381] [INFO] [timer.py:197:stop] 0/7884, RunningAvgSamplesPerSec=6.338126813691846, CurrSamplesPerSec=5.74215327376515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:59:20,662] [INFO] [timer.py:197:stop] 0/7886, RunningAvgSamplesPerSec=6.338132665058401, CurrSamplesPerSec=5.708553464983033, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:59:31,934] [INFO] [timer.py:197:stop] 0/7888, RunningAvgSamplesPerSec=6.3381354312682605, CurrSamplesPerSec=5.701538481405924, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:59:43,231] [INFO] [timer.py:197:stop] 0/7890, RunningAvgSamplesPerSec=6.33813710920125, CurrSamplesPerSec=5.703827706300002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 09:59:54,487] [INFO] [timer.py:197:stop] 0/7892, RunningAvgSamplesPerSec=6.338145160230165, CurrSamplesPerSec=5.736119995746772, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:00:05,845] [INFO] [timer.py:197:stop] 0/7894, RunningAvgSamplesPerSec=6.338134902893071, CurrSamplesPerSec=5.653856153556985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:00:17,120] [INFO] [timer.py:197:stop] 0/7896, RunningAvgSamplesPerSec=6.33814083501362, CurrSamplesPerSec=5.713846717230511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:00:28,404] [INFO] [timer.py:197:stop] 0/7898, RunningAvgSamplesPerSec=6.338142276531903, CurrSamplesPerSec=5.710470743574421, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:00:39,716] [INFO] [logging.py:68:log_dist] [Rank 0] step=3950, skipped=5, lr=[2.346666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:00:39,717] [INFO] [timer.py:197:stop] 0/7900, RunningAvgSamplesPerSec=6.338144709674223, CurrSamplesPerSec=5.706045509182841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.346666666666667e-06, 'epoch': 16.74} +[2022-12-17 10:00:51,009] [INFO] [timer.py:197:stop] 0/7902, RunningAvgSamplesPerSec=6.338149983020205, CurrSamplesPerSec=5.717792715727425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:01:02,335] [INFO] [timer.py:197:stop] 0/7904, RunningAvgSamplesPerSec=6.338149141416174, CurrSamplesPerSec=5.701027485092423, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:01:13,708] [INFO] [timer.py:197:stop] 0/7906, RunningAvgSamplesPerSec=6.33814149997887, CurrSamplesPerSec=5.634362080092918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:01:25,007] [INFO] [timer.py:197:stop] 0/7908, RunningAvgSamplesPerSec=6.338143007536694, CurrSamplesPerSec=5.710456408998133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:01:36,262] [INFO] [timer.py:197:stop] 0/7910, RunningAvgSamplesPerSec=6.338148563510418, CurrSamplesPerSec=5.737118653290545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:01:47,491] [INFO] [timer.py:197:stop] 0/7912, RunningAvgSamplesPerSec=6.338160609062336, CurrSamplesPerSec=5.749935600230909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:01:58,735] [INFO] [timer.py:197:stop] 0/7914, RunningAvgSamplesPerSec=6.338165702227535, CurrSamplesPerSec=5.708400750125987, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:02:09,952] [INFO] [timer.py:197:stop] 0/7916, RunningAvgSamplesPerSec=6.338175063089888, CurrSamplesPerSec=5.747496251131897, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:02:21,213] [INFO] [timer.py:197:stop] 0/7918, RunningAvgSamplesPerSec=6.3381825449010805, CurrSamplesPerSec=5.728961577617624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:02:32,519] [INFO] [logging.py:68:log_dist] [Rank 0] step=3960, skipped=5, lr=[2.3244444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:02:32,521] [INFO] [timer.py:197:stop] 0/7920, RunningAvgSamplesPerSec=6.338182967900777, CurrSamplesPerSec=5.685274770482056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:02:43,763] [INFO] [timer.py:197:stop] 0/7922, RunningAvgSamplesPerSec=6.3381882149312325, CurrSamplesPerSec=5.7287571532247386, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:02:55,040] [INFO] [timer.py:197:stop] 0/7924, RunningAvgSamplesPerSec=6.338192504869388, CurrSamplesPerSec=5.718937541301829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:03:06,297] [INFO] [timer.py:197:stop] 0/7926, RunningAvgSamplesPerSec=6.338198398399984, CurrSamplesPerSec=5.70923726157512, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:03:17,576] [INFO] [timer.py:197:stop] 0/7928, RunningAvgSamplesPerSec=6.338203036589526, CurrSamplesPerSec=5.726441036860908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:03:28,838] [INFO] [timer.py:197:stop] 0/7930, RunningAvgSamplesPerSec=6.33820765643813, CurrSamplesPerSec=5.714428379740114, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:03:40,116] [INFO] [timer.py:197:stop] 0/7932, RunningAvgSamplesPerSec=6.338211739957927, CurrSamplesPerSec=5.719892928542276, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:03:51,385] [INFO] [timer.py:197:stop] 0/7934, RunningAvgSamplesPerSec=6.338217423452864, CurrSamplesPerSec=5.711872235060416, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:04:02,675] [INFO] [timer.py:197:stop] 0/7936, RunningAvgSamplesPerSec=6.338219974967238, CurrSamplesPerSec=5.700439589394914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:04:13,955] [INFO] [timer.py:197:stop] 0/7938, RunningAvgSamplesPerSec=6.338221058392447, CurrSamplesPerSec=5.6929115519410205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:04:25,215] [INFO] [logging.py:68:log_dist] [Rank 0] step=3970, skipped=5, lr=[2.302222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:04:25,217] [INFO] [timer.py:197:stop] 0/7940, RunningAvgSamplesPerSec=6.33822818491925, CurrSamplesPerSec=5.719371325538942, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:04:36,505] [INFO] [timer.py:197:stop] 0/7942, RunningAvgSamplesPerSec=6.3382306066497565, CurrSamplesPerSec=5.705062974173377, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:04:47,796] [INFO] [timer.py:197:stop] 0/7944, RunningAvgSamplesPerSec=6.338232647273031, CurrSamplesPerSec=5.712902345683689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:04:59,081] [INFO] [timer.py:197:stop] 0/7946, RunningAvgSamplesPerSec=6.338235937628303, CurrSamplesPerSec=5.716276813708189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:05:10,371] [INFO] [timer.py:197:stop] 0/7948, RunningAvgSamplesPerSec=6.338235903439613, CurrSamplesPerSec=5.702641673870506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:05:21,748] [INFO] [timer.py:197:stop] 0/7950, RunningAvgSamplesPerSec=6.338230165437593, CurrSamplesPerSec=5.684356430343305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.2911111111111114e-06, 'epoch': 16.84} +[2022-12-17 10:05:33,034] [INFO] [timer.py:197:stop] 0/7952, RunningAvgSamplesPerSec=6.3382333553649195, CurrSamplesPerSec=5.722647789710324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:05:44,320] [INFO] [timer.py:197:stop] 0/7954, RunningAvgSamplesPerSec=6.338236371939146, CurrSamplesPerSec=5.698118247592226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:05:55,639] [INFO] [timer.py:197:stop] 0/7956, RunningAvgSamplesPerSec=6.338234088343676, CurrSamplesPerSec=5.708029801166079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:06:06,899] [INFO] [timer.py:197:stop] 0/7958, RunningAvgSamplesPerSec=6.338239428620746, CurrSamplesPerSec=5.725561130059419, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:06:18,199] [INFO] [logging.py:68:log_dist] [Rank 0] step=3980, skipped=5, lr=[2.28e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:06:18,200] [INFO] [timer.py:197:stop] 0/7960, RunningAvgSamplesPerSec=6.338242054976651, CurrSamplesPerSec=5.724104821554558, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:06:29,479] [INFO] [timer.py:197:stop] 0/7962, RunningAvgSamplesPerSec=6.338247887723113, CurrSamplesPerSec=5.722088847612678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:06:40,729] [INFO] [timer.py:197:stop] 0/7964, RunningAvgSamplesPerSec=6.338257991239867, CurrSamplesPerSec=5.744080655070806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:06:51,967] [INFO] [timer.py:197:stop] 0/7966, RunningAvgSamplesPerSec=6.338269079031337, CurrSamplesPerSec=5.737984699474484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:07:03,226] [INFO] [timer.py:197:stop] 0/7968, RunningAvgSamplesPerSec=6.338276621632127, CurrSamplesPerSec=5.748057952642601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:07:14,518] [INFO] [timer.py:197:stop] 0/7970, RunningAvgSamplesPerSec=6.338279152405919, CurrSamplesPerSec=5.720762803449684, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:07:25,805] [INFO] [timer.py:197:stop] 0/7972, RunningAvgSamplesPerSec=6.33828324681685, CurrSamplesPerSec=5.7059846214691925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:07:37,059] [INFO] [timer.py:197:stop] 0/7974, RunningAvgSamplesPerSec=6.338292838997039, CurrSamplesPerSec=5.754532575012987, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:07:48,363] [INFO] [timer.py:197:stop] 0/7976, RunningAvgSamplesPerSec=6.338298034828886, CurrSamplesPerSec=5.725873536852861, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:07:59,624] [INFO] [timer.py:197:stop] 0/7978, RunningAvgSamplesPerSec=6.338305476156251, CurrSamplesPerSec=5.74477913706752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:08:10,871] [INFO] [logging.py:68:log_dist] [Rank 0] step=3990, skipped=5, lr=[2.257777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:08:10,873] [INFO] [timer.py:197:stop] 0/7980, RunningAvgSamplesPerSec=6.338315193684523, CurrSamplesPerSec=5.74299135241991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:08:22,111] [INFO] [timer.py:197:stop] 0/7982, RunningAvgSamplesPerSec=6.338326127329389, CurrSamplesPerSec=5.745274397571756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:08:33,411] [INFO] [timer.py:197:stop] 0/7984, RunningAvgSamplesPerSec=6.3383327882824245, CurrSamplesPerSec=5.714127924469036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:08:44,654] [INFO] [timer.py:197:stop] 0/7986, RunningAvgSamplesPerSec=6.338340140310366, CurrSamplesPerSec=5.730535351302664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:08:55,959] [INFO] [timer.py:197:stop] 0/7988, RunningAvgSamplesPerSec=6.33834081377446, CurrSamplesPerSec=5.7012589962436975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:09:07,169] [INFO] [timer.py:197:stop] 0/7990, RunningAvgSamplesPerSec=6.338350998917928, CurrSamplesPerSec=5.740558872668938, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:09:18,394] [INFO] [timer.py:197:stop] 0/7992, RunningAvgSamplesPerSec=6.338359022801522, CurrSamplesPerSec=5.732135204831851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:09:29,644] [INFO] [timer.py:197:stop] 0/7994, RunningAvgSamplesPerSec=6.338368416212484, CurrSamplesPerSec=5.744597923555065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:09:40,900] [INFO] [timer.py:197:stop] 0/7996, RunningAvgSamplesPerSec=6.33837681122492, CurrSamplesPerSec=5.742089402106264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:09:52,156] [INFO] [timer.py:197:stop] 0/7998, RunningAvgSamplesPerSec=6.33838585503386, CurrSamplesPerSec=5.741089504595729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 10:10:03,430] [INFO] [logging.py:68:log_dist] [Rank 0] step=4000, skipped=5, lr=[2.235555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 10:10:03,432] [INFO] [timer.py:197:stop] 0/8000, RunningAvgSamplesPerSec=6.338391427625841, CurrSamplesPerSec=5.722572395688769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0004, 'learning_rate': 2.235555555555556e-06, 'epoch': 16.95} +{'eval_loss': 0.20751953125, 'eval_wer': 9.117695806707058, 'eval_runtime': 2104.1262, 'eval_samples_per_second': 3.666, 'eval_steps_per_second': 0.459, 'epoch': 16.95} +[2022-12-17 10:45:11,140] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step4000 is begin to save! +[2022-12-17 10:45:11,149] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-4000/global_step4000/mp_rank_00_model_states.pt +[2022-12-17 10:45:11,149] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-4000/global_step4000/mp_rank_00_model_states.pt... +[2022-12-17 10:45:14,682] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-4000/global_step4000/mp_rank_00_model_states.pt. +[2022-12-17 10:45:14,683] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-17 10:45:29,453] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-17 10:45:29,453] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-17 10:45:29,453] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now!