diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -2737,3 +2737,1152 @@ Time to load utils op: 0.0003948211669921875 seconds [2022-12-17 03:13:31,522] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-2000/global_step2000/zero_pp_rank_0_mp_rank_00_optim_states.pt. [2022-12-17 03:13:31,523] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-2000/global_step2000/zero_pp_rank_0_mp_rank_00_optim_states.pt [2022-12-17 03:13:31,523] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +[2022-12-17 03:15:34,464] [INFO] [timer.py:197:stop] 0/4002, RunningAvgSamplesPerSec=6.32971650220364, CurrSamplesPerSec=5.432966773902989, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:15:46,027] [INFO] [timer.py:197:stop] 0/4004, RunningAvgSamplesPerSec=6.329736699376116, CurrSamplesPerSec=5.732430946357427, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:15:57,711] [INFO] [timer.py:197:stop] 0/4006, RunningAvgSamplesPerSec=6.329714692420776, CurrSamplesPerSec=5.7052121153177975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:16:09,000] [INFO] [timer.py:197:stop] 0/4008, RunningAvgSamplesPerSec=6.329729049348178, CurrSamplesPerSec=5.721881254416873, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:16:20,309] [INFO] [timer.py:197:stop] 0/4010, RunningAvgSamplesPerSec=6.32973210769902, CurrSamplesPerSec=5.6921966559580985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:16:31,950] [INFO] [timer.py:197:stop] 0/4012, RunningAvgSamplesPerSec=6.329631531896987, CurrSamplesPerSec=5.6903598999459915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:16:43,277] [INFO] [timer.py:197:stop] 0/4014, RunningAvgSamplesPerSec=6.3296284082486265, CurrSamplesPerSec=5.682677266268421, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:16:54,617] [INFO] [timer.py:197:stop] 0/4016, RunningAvgSamplesPerSec=6.329620990535829, CurrSamplesPerSec=5.6637007057802835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:17:06,320] [INFO] [timer.py:197:stop] 0/4018, RunningAvgSamplesPerSec=6.32961940678384, CurrSamplesPerSec=5.690537948945658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:17:17,689] [INFO] [logging.py:68:log_dist] [Rank 0] step=2010, skipped=5, lr=[6.657777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:17:17,691] [INFO] [timer.py:197:stop] 0/4020, RunningAvgSamplesPerSec=6.329603248560779, CurrSamplesPerSec=5.636197649705268, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:17:28,951] [INFO] [timer.py:197:stop] 0/4022, RunningAvgSamplesPerSec=6.329621839411704, CurrSamplesPerSec=5.729020022201075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:17:40,565] [INFO] [timer.py:197:stop] 0/4024, RunningAvgSamplesPerSec=6.329636464480563, CurrSamplesPerSec=5.72667583837786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:17:51,974] [INFO] [timer.py:197:stop] 0/4026, RunningAvgSamplesPerSec=6.329651877567379, CurrSamplesPerSec=5.730219255254423, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:18:03,329] [INFO] [timer.py:197:stop] 0/4028, RunningAvgSamplesPerSec=6.3296407044135305, CurrSamplesPerSec=5.6457342331832026, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:18:14,820] [INFO] [timer.py:197:stop] 0/4030, RunningAvgSamplesPerSec=6.329647226145717, CurrSamplesPerSec=5.706841778088452, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:18:26,288] [INFO] [timer.py:197:stop] 0/4032, RunningAvgSamplesPerSec=6.329657786556081, CurrSamplesPerSec=5.715247919575367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:18:37,684] [INFO] [timer.py:197:stop] 0/4034, RunningAvgSamplesPerSec=6.329633943231989, CurrSamplesPerSec=5.610441031101313, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:18:49,059] [INFO] [timer.py:197:stop] 0/4036, RunningAvgSamplesPerSec=6.329643210614384, CurrSamplesPerSec=5.71373385290509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:19:00,385] [INFO] [timer.py:197:stop] 0/4038, RunningAvgSamplesPerSec=6.329647008206662, CurrSamplesPerSec=5.697957140380308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:19:11,655] [INFO] [logging.py:68:log_dist] [Rank 0] step=2020, skipped=5, lr=[6.6355555555555565e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:19:11,656] [INFO] [timer.py:197:stop] 0/4040, RunningAvgSamplesPerSec=6.329656548870118, CurrSamplesPerSec=5.712982834924393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:19:22,964] [INFO] [timer.py:197:stop] 0/4042, RunningAvgSamplesPerSec=6.329664344944999, CurrSamplesPerSec=5.707049494758968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:19:34,251] [INFO] [timer.py:197:stop] 0/4044, RunningAvgSamplesPerSec=6.329677524006703, CurrSamplesPerSec=5.702061923775371, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:19:45,586] [INFO] [timer.py:197:stop] 0/4046, RunningAvgSamplesPerSec=6.329673899017344, CurrSamplesPerSec=5.676386285852038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:19:56,869] [INFO] [timer.py:197:stop] 0/4048, RunningAvgSamplesPerSec=6.329685060089817, CurrSamplesPerSec=5.705445179125403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:20:08,153] [INFO] [timer.py:197:stop] 0/4050, RunningAvgSamplesPerSec=6.329690147179106, CurrSamplesPerSec=5.718421959793726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0044, 'learning_rate': 6.6244444444444445e-06, 'epoch': 8.58} +[2022-12-17 03:20:19,482] [INFO] [timer.py:197:stop] 0/4052, RunningAvgSamplesPerSec=6.329687546775836, CurrSamplesPerSec=5.677962528789538, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:20:30,761] [INFO] [timer.py:197:stop] 0/4054, RunningAvgSamplesPerSec=6.329704134400702, CurrSamplesPerSec=5.731804735092358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:20:42,053] [INFO] [timer.py:197:stop] 0/4056, RunningAvgSamplesPerSec=6.3297167432347985, CurrSamplesPerSec=5.729271665429117, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:20:53,666] [INFO] [timer.py:197:stop] 0/4058, RunningAvgSamplesPerSec=6.329700568900112, CurrSamplesPerSec=5.670981747554733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:21:04,942] [INFO] [logging.py:68:log_dist] [Rank 0] step=2030, skipped=5, lr=[6.613333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:21:04,944] [INFO] [timer.py:197:stop] 0/4060, RunningAvgSamplesPerSec=6.329714148054933, CurrSamplesPerSec=5.7095685344444, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:21:16,406] [INFO] [timer.py:197:stop] 0/4062, RunningAvgSamplesPerSec=6.32971542335254, CurrSamplesPerSec=5.707395803713119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:21:27,826] [INFO] [timer.py:197:stop] 0/4064, RunningAvgSamplesPerSec=6.329716546008745, CurrSamplesPerSec=5.7032450492428435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:21:39,082] [INFO] [timer.py:197:stop] 0/4066, RunningAvgSamplesPerSec=6.329727383544931, CurrSamplesPerSec=5.708138313313967, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:21:50,416] [INFO] [timer.py:197:stop] 0/4068, RunningAvgSamplesPerSec=6.329723384009874, CurrSamplesPerSec=5.685249002830177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:22:01,855] [INFO] [timer.py:197:stop] 0/4070, RunningAvgSamplesPerSec=6.329735288298022, CurrSamplesPerSec=5.720577250197871, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:22:13,353] [INFO] [timer.py:197:stop] 0/4072, RunningAvgSamplesPerSec=6.329753146864072, CurrSamplesPerSec=5.725375509643616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:22:24,779] [INFO] [timer.py:197:stop] 0/4074, RunningAvgSamplesPerSec=6.329715954461023, CurrSamplesPerSec=5.568953772876529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:22:36,381] [INFO] [timer.py:197:stop] 0/4076, RunningAvgSamplesPerSec=6.329719132052436, CurrSamplesPerSec=5.680185980782685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:22:47,778] [INFO] [timer.py:197:stop] 0/4078, RunningAvgSamplesPerSec=6.3297134755114035, CurrSamplesPerSec=5.69351552665189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:22:59,261] [INFO] [logging.py:68:log_dist] [Rank 0] step=2040, skipped=5, lr=[6.591111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:22:59,263] [INFO] [timer.py:197:stop] 0/4080, RunningAvgSamplesPerSec=6.329662099764259, CurrSamplesPerSec=5.5199089195498106, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:23:10,789] [INFO] [timer.py:197:stop] 0/4082, RunningAvgSamplesPerSec=6.32966716446522, CurrSamplesPerSec=5.701408180754435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:23:22,027] [INFO] [timer.py:197:stop] 0/4084, RunningAvgSamplesPerSec=6.329677835786168, CurrSamplesPerSec=5.70064635601823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:23:33,530] [INFO] [timer.py:197:stop] 0/4086, RunningAvgSamplesPerSec=6.329621485972424, CurrSamplesPerSec=5.497445663780493, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:23:44,854] [INFO] [timer.py:197:stop] 0/4088, RunningAvgSamplesPerSec=6.3296199090930605, CurrSamplesPerSec=5.686864869676773, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:23:56,148] [INFO] [timer.py:197:stop] 0/4090, RunningAvgSamplesPerSec=6.329627497694356, CurrSamplesPerSec=5.707074489723329, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:24:07,626] [INFO] [timer.py:197:stop] 0/4092, RunningAvgSamplesPerSec=6.329579498207154, CurrSamplesPerSec=5.5263138434487615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:24:18,910] [INFO] [timer.py:197:stop] 0/4094, RunningAvgSamplesPerSec=6.3295852834765025, CurrSamplesPerSec=5.718602012903586, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:24:30,243] [INFO] [timer.py:197:stop] 0/4096, RunningAvgSamplesPerSec=6.329582388154523, CurrSamplesPerSec=5.686816197184262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:24:41,558] [INFO] [timer.py:197:stop] 0/4098, RunningAvgSamplesPerSec=6.329574192901616, CurrSamplesPerSec=5.650972720985214, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:24:52,865] [INFO] [logging.py:68:log_dist] [Rank 0] step=2050, skipped=5, lr=[6.568888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:24:52,867] [INFO] [timer.py:197:stop] 0/4100, RunningAvgSamplesPerSec=6.329577261261745, CurrSamplesPerSec=5.709207876322052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0038, 'learning_rate': 6.568888888888889e-06, 'epoch': 8.69} +[2022-12-17 03:25:04,148] [INFO] [timer.py:197:stop] 0/4102, RunningAvgSamplesPerSec=6.329584710681415, CurrSamplesPerSec=5.7096530588978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:25:15,433] [INFO] [timer.py:197:stop] 0/4104, RunningAvgSamplesPerSec=6.329591737707764, CurrSamplesPerSec=5.7040087808033695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:25:26,720] [INFO] [timer.py:197:stop] 0/4106, RunningAvgSamplesPerSec=6.3296020891744265, CurrSamplesPerSec=5.718893922820299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:25:38,007] [INFO] [timer.py:197:stop] 0/4108, RunningAvgSamplesPerSec=6.329611851038329, CurrSamplesPerSec=5.717244706560494, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:25:49,318] [INFO] [timer.py:197:stop] 0/4110, RunningAvgSamplesPerSec=6.32961398025573, CurrSamplesPerSec=5.707869589430014, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:26:00,697] [INFO] [timer.py:197:stop] 0/4112, RunningAvgSamplesPerSec=6.329599969613452, CurrSamplesPerSec=5.651735367307539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:26:11,995] [INFO] [timer.py:197:stop] 0/4114, RunningAvgSamplesPerSec=6.329607728066423, CurrSamplesPerSec=5.702019531298125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:26:23,266] [INFO] [timer.py:197:stop] 0/4116, RunningAvgSamplesPerSec=6.329624606775776, CurrSamplesPerSec=5.729385878152315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:26:34,582] [INFO] [timer.py:197:stop] 0/4118, RunningAvgSamplesPerSec=6.3296290795189885, CurrSamplesPerSec=5.688858265982743, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:26:45,872] [INFO] [logging.py:68:log_dist] [Rank 0] step=2060, skipped=5, lr=[6.546666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:26:45,874] [INFO] [timer.py:197:stop] 0/4120, RunningAvgSamplesPerSec=6.3296391052996706, CurrSamplesPerSec=5.710607776037591, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:26:57,215] [INFO] [timer.py:197:stop] 0/4122, RunningAvgSamplesPerSec=6.3296344831495865, CurrSamplesPerSec=5.668019787475643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:27:08,515] [INFO] [timer.py:197:stop] 0/4124, RunningAvgSamplesPerSec=6.3296411666454695, CurrSamplesPerSec=5.702394787187077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:27:19,807] [INFO] [timer.py:197:stop] 0/4126, RunningAvgSamplesPerSec=6.3296543447409706, CurrSamplesPerSec=5.716563129758426, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:27:31,092] [INFO] [timer.py:197:stop] 0/4128, RunningAvgSamplesPerSec=6.329665769004512, CurrSamplesPerSec=5.713112449253873, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:27:42,402] [INFO] [timer.py:197:stop] 0/4130, RunningAvgSamplesPerSec=6.329669537874054, CurrSamplesPerSec=5.685855206581097, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:27:53,659] [INFO] [timer.py:197:stop] 0/4132, RunningAvgSamplesPerSec=6.329684738201763, CurrSamplesPerSec=5.730757275477921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:28:05,083] [INFO] [timer.py:197:stop] 0/4134, RunningAvgSamplesPerSec=6.329653695177852, CurrSamplesPerSec=5.704550376108452, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:28:16,391] [INFO] [timer.py:197:stop] 0/4136, RunningAvgSamplesPerSec=6.329658226333892, CurrSamplesPerSec=5.669268567918603, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:28:27,660] [INFO] [timer.py:197:stop] 0/4138, RunningAvgSamplesPerSec=6.329669431371385, CurrSamplesPerSec=5.7140525115073775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:28:39,033] [INFO] [logging.py:68:log_dist] [Rank 0] step=2070, skipped=5, lr=[6.524444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:28:39,035] [INFO] [timer.py:197:stop] 0/4140, RunningAvgSamplesPerSec=6.329653569925222, CurrSamplesPerSec=5.702104074634847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:28:50,314] [INFO] [timer.py:197:stop] 0/4142, RunningAvgSamplesPerSec=6.329667150732836, CurrSamplesPerSec=5.711580798448425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:29:01,628] [INFO] [timer.py:197:stop] 0/4144, RunningAvgSamplesPerSec=6.329672264319184, CurrSamplesPerSec=5.7046015347856525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:29:13,190] [INFO] [timer.py:197:stop] 0/4146, RunningAvgSamplesPerSec=6.329590255909673, CurrSamplesPerSec=5.693526153503661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:29:24,479] [INFO] [timer.py:197:stop] 0/4148, RunningAvgSamplesPerSec=6.329600253682247, CurrSamplesPerSec=5.712139877360979, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:29:35,740] [INFO] [timer.py:197:stop] 0/4150, RunningAvgSamplesPerSec=6.329614214241194, CurrSamplesPerSec=5.721944677297023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0043, 'learning_rate': 6.513333333333333e-06, 'epoch': 8.79} +[2022-12-17 03:29:47,451] [INFO] [timer.py:197:stop] 0/4152, RunningAvgSamplesPerSec=6.329625743832189, CurrSamplesPerSec=5.713240853488009, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:29:58,927] [INFO] [timer.py:197:stop] 0/4154, RunningAvgSamplesPerSec=6.32962401605577, CurrSamplesPerSec=5.681033239220668, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:30:10,298] [INFO] [timer.py:197:stop] 0/4156, RunningAvgSamplesPerSec=6.329609371145642, CurrSamplesPerSec=5.640112222567924, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:30:21,742] [INFO] [timer.py:197:stop] 0/4158, RunningAvgSamplesPerSec=6.329619336010558, CurrSamplesPerSec=5.725745541206016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:30:33,113] [INFO] [logging.py:68:log_dist] [Rank 0] step=2080, skipped=5, lr=[6.502222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:30:33,114] [INFO] [timer.py:197:stop] 0/4160, RunningAvgSamplesPerSec=6.3296304931523135, CurrSamplesPerSec=5.719744481367389, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:30:44,377] [INFO] [timer.py:197:stop] 0/4162, RunningAvgSamplesPerSec=6.329639665828144, CurrSamplesPerSec=5.7231092248855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:30:55,952] [INFO] [timer.py:197:stop] 0/4164, RunningAvgSamplesPerSec=6.329642890850235, CurrSamplesPerSec=5.7104019868553015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:31:07,478] [INFO] [timer.py:197:stop] 0/4166, RunningAvgSamplesPerSec=6.329641465120764, CurrSamplesPerSec=5.693680488845232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:31:18,787] [INFO] [timer.py:197:stop] 0/4168, RunningAvgSamplesPerSec=6.329646013398303, CurrSamplesPerSec=5.698077365169859, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:31:30,204] [INFO] [timer.py:197:stop] 0/4170, RunningAvgSamplesPerSec=6.32965932265026, CurrSamplesPerSec=5.731016412667152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:31:41,572] [INFO] [timer.py:197:stop] 0/4172, RunningAvgSamplesPerSec=6.329659992211202, CurrSamplesPerSec=5.703579747419184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:31:53,022] [INFO] [timer.py:197:stop] 0/4174, RunningAvgSamplesPerSec=6.329620320914269, CurrSamplesPerSec=5.552174278511241, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:32:04,605] [INFO] [timer.py:197:stop] 0/4176, RunningAvgSamplesPerSec=6.329618251779879, CurrSamplesPerSec=5.664145751377774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:32:16,047] [INFO] [timer.py:197:stop] 0/4178, RunningAvgSamplesPerSec=6.329620119317804, CurrSamplesPerSec=5.691267149303224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:32:27,557] [INFO] [logging.py:68:log_dist] [Rank 0] step=2090, skipped=5, lr=[6.480000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:32:27,559] [INFO] [timer.py:197:stop] 0/4180, RunningAvgSamplesPerSec=6.329559581631733, CurrSamplesPerSec=5.49025277515664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:32:38,847] [INFO] [timer.py:197:stop] 0/4182, RunningAvgSamplesPerSec=6.329566571122446, CurrSamplesPerSec=5.7119519660201625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:32:50,221] [INFO] [timer.py:197:stop] 0/4184, RunningAvgSamplesPerSec=6.329551512684384, CurrSamplesPerSec=5.658288559220713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:33:01,779] [INFO] [timer.py:197:stop] 0/4186, RunningAvgSamplesPerSec=6.329482129285263, CurrSamplesPerSec=5.455197789627966, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:33:13,234] [INFO] [timer.py:197:stop] 0/4188, RunningAvgSamplesPerSec=6.329489056102356, CurrSamplesPerSec=5.689115074579025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:33:24,534] [INFO] [timer.py:197:stop] 0/4190, RunningAvgSamplesPerSec=6.3294930470222965, CurrSamplesPerSec=5.715814775678154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:33:36,200] [INFO] [timer.py:197:stop] 0/4192, RunningAvgSamplesPerSec=6.329385693595587, CurrSamplesPerSec=5.342715526710825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:33:47,585] [INFO] [timer.py:197:stop] 0/4194, RunningAvgSamplesPerSec=6.329387920504537, CurrSamplesPerSec=5.708342239921322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:33:58,934] [INFO] [timer.py:197:stop] 0/4196, RunningAvgSamplesPerSec=6.329393759067435, CurrSamplesPerSec=5.698761557733716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:34:10,299] [INFO] [timer.py:197:stop] 0/4198, RunningAvgSamplesPerSec=6.329377635096463, CurrSamplesPerSec=5.627035752573431, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:34:21,607] [INFO] [logging.py:68:log_dist] [Rank 0] step=2100, skipped=5, lr=[6.457777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:34:21,609] [INFO] [timer.py:197:stop] 0/4200, RunningAvgSamplesPerSec=6.329387313311945, CurrSamplesPerSec=5.706954612955244, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0033, 'learning_rate': 6.457777777777778e-06, 'epoch': 8.9} +[2022-12-17 03:34:32,898] [INFO] [timer.py:197:stop] 0/4202, RunningAvgSamplesPerSec=6.3293938964875185, CurrSamplesPerSec=5.699129125038513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:34:44,297] [INFO] [timer.py:197:stop] 0/4204, RunningAvgSamplesPerSec=6.329363594365425, CurrSamplesPerSec=5.581797360025495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:34:55,847] [INFO] [timer.py:197:stop] 0/4206, RunningAvgSamplesPerSec=6.329371810931073, CurrSamplesPerSec=5.705324158184767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:35:07,174] [INFO] [timer.py:197:stop] 0/4208, RunningAvgSamplesPerSec=6.3293724138440295, CurrSamplesPerSec=5.683446328781715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:35:18,805] [INFO] [timer.py:197:stop] 0/4210, RunningAvgSamplesPerSec=6.329274039267405, CurrSamplesPerSec=5.35519432319045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:35:30,104] [INFO] [timer.py:197:stop] 0/4212, RunningAvgSamplesPerSec=6.32927545611261, CurrSamplesPerSec=5.688271912984834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:35:41,411] [INFO] [timer.py:197:stop] 0/4214, RunningAvgSamplesPerSec=6.329279504683213, CurrSamplesPerSec=5.702676806686563, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:35:52,661] [INFO] [timer.py:197:stop] 0/4216, RunningAvgSamplesPerSec=6.3292975459712455, CurrSamplesPerSec=5.735788331041529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:36:03,980] [INFO] [timer.py:197:stop] 0/4218, RunningAvgSamplesPerSec=6.3292954506333325, CurrSamplesPerSec=5.720244698668586, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:36:15,270] [INFO] [logging.py:68:log_dist] [Rank 0] step=2110, skipped=5, lr=[6.435555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:36:15,271] [INFO] [timer.py:197:stop] 0/4220, RunningAvgSamplesPerSec=6.3293015183733585, CurrSamplesPerSec=5.70264627745624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:36:26,550] [INFO] [timer.py:197:stop] 0/4222, RunningAvgSamplesPerSec=6.329314799213386, CurrSamplesPerSec=5.726156173463574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:36:37,964] [INFO] [timer.py:197:stop] 0/4224, RunningAvgSamplesPerSec=6.329297925836548, CurrSamplesPerSec=5.658801704212691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:36:49,489] [INFO] [timer.py:197:stop] 0/4226, RunningAvgSamplesPerSec=6.329300030042395, CurrSamplesPerSec=5.680489849179637, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:37:00,832] [INFO] [timer.py:197:stop] 0/4228, RunningAvgSamplesPerSec=6.329290592139952, CurrSamplesPerSec=5.6715307493274505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:37:12,211] [INFO] [timer.py:197:stop] 0/4230, RunningAvgSamplesPerSec=6.329296799735275, CurrSamplesPerSec=5.710345622158048, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:37:23,534] [INFO] [timer.py:197:stop] 0/4232, RunningAvgSamplesPerSec=6.329293914072135, CurrSamplesPerSec=5.700618996107484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:37:35,089] [INFO] [timer.py:197:stop] 0/4234, RunningAvgSamplesPerSec=6.329223078174755, CurrSamplesPerSec=5.474385340765658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:37:46,417] [INFO] [timer.py:197:stop] 0/4236, RunningAvgSamplesPerSec=6.329221569757894, CurrSamplesPerSec=5.6885783348551735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:37:57,764] [INFO] [timer.py:197:stop] 0/4238, RunningAvgSamplesPerSec=6.3292032239593174, CurrSamplesPerSec=5.621259492527623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:38:09,097] [INFO] [logging.py:68:log_dist] [Rank 0] step=2120, skipped=5, lr=[6.4133333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:38:09,099] [INFO] [timer.py:197:stop] 0/4240, RunningAvgSamplesPerSec=6.3291967428627265, CurrSamplesPerSec=5.660347665602659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:38:20,450] [INFO] [timer.py:197:stop] 0/4242, RunningAvgSamplesPerSec=6.329187590411158, CurrSamplesPerSec=5.664508866697526, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:38:31,735] [INFO] [timer.py:197:stop] 0/4244, RunningAvgSamplesPerSec=6.329196166706487, CurrSamplesPerSec=5.720654542271054, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:38:43,043] [INFO] [timer.py:197:stop] 0/4246, RunningAvgSamplesPerSec=6.3291981518458655, CurrSamplesPerSec=5.692124476172127, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:38:51,551] [INFO] [timer.py:197:stop] 0/4248, RunningAvgSamplesPerSec=6.329926955740864, CurrSamplesPerSec=10.191885733525591, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:39:02,808] [INFO] [timer.py:197:stop] 0/4250, RunningAvgSamplesPerSec=6.329945497555158, CurrSamplesPerSec=5.748404824921448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0034, 'learning_rate': 6.402222222222223e-06, 'epoch': 9.0} +[2022-12-17 03:39:14,100] [INFO] [timer.py:197:stop] 0/4252, RunningAvgSamplesPerSec=6.329951721289756, CurrSamplesPerSec=5.724151693184711, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:39:25,414] [INFO] [timer.py:197:stop] 0/4254, RunningAvgSamplesPerSec=6.329950908156978, CurrSamplesPerSec=5.700597931576377, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:39:36,694] [INFO] [timer.py:197:stop] 0/4256, RunningAvgSamplesPerSec=6.329955788741976, CurrSamplesPerSec=5.695662231242793, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:39:47,972] [INFO] [timer.py:197:stop] 0/4258, RunningAvgSamplesPerSec=6.329966695173077, CurrSamplesPerSec=5.712562662301858, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:39:59,270] [INFO] [logging.py:68:log_dist] [Rank 0] step=2130, skipped=5, lr=[6.391111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:39:59,272] [INFO] [timer.py:197:stop] 0/4260, RunningAvgSamplesPerSec=6.3299661333167165, CurrSamplesPerSec=5.71034149203002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:40:10,580] [INFO] [timer.py:197:stop] 0/4262, RunningAvgSamplesPerSec=6.329958580709758, CurrSamplesPerSec=5.673393738119202, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:40:21,866] [INFO] [timer.py:197:stop] 0/4264, RunningAvgSamplesPerSec=6.3299615564115275, CurrSamplesPerSec=5.704911658444872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:40:33,141] [INFO] [timer.py:197:stop] 0/4266, RunningAvgSamplesPerSec=6.329968199591096, CurrSamplesPerSec=5.7203561137522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:40:44,456] [INFO] [timer.py:197:stop] 0/4268, RunningAvgSamplesPerSec=6.329969998219967, CurrSamplesPerSec=5.713600561987379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:40:55,751] [INFO] [timer.py:197:stop] 0/4270, RunningAvgSamplesPerSec=6.329975276219748, CurrSamplesPerSec=5.6975735193423205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:41:07,069] [INFO] [timer.py:197:stop] 0/4272, RunningAvgSamplesPerSec=6.329973921749979, CurrSamplesPerSec=5.702552995722206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:41:18,377] [INFO] [timer.py:197:stop] 0/4274, RunningAvgSamplesPerSec=6.329976146136992, CurrSamplesPerSec=5.694227613441014, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:41:29,677] [INFO] [timer.py:197:stop] 0/4276, RunningAvgSamplesPerSec=6.329976503267094, CurrSamplesPerSec=5.705686993857206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:41:40,961] [INFO] [timer.py:197:stop] 0/4278, RunningAvgSamplesPerSec=6.329981382624964, CurrSamplesPerSec=5.711891438350277, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:41:52,457] [INFO] [logging.py:68:log_dist] [Rank 0] step=2140, skipped=5, lr=[6.368888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:41:52,459] [INFO] [timer.py:197:stop] 0/4280, RunningAvgSamplesPerSec=6.329982693290168, CurrSamplesPerSec=5.707232958038073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:42:03,725] [INFO] [timer.py:197:stop] 0/4282, RunningAvgSamplesPerSec=6.329991865303273, CurrSamplesPerSec=5.713054571959741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:42:15,042] [INFO] [timer.py:197:stop] 0/4284, RunningAvgSamplesPerSec=6.329990865925492, CurrSamplesPerSec=5.676679663925739, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:42:26,344] [INFO] [timer.py:197:stop] 0/4286, RunningAvgSamplesPerSec=6.3299970498352485, CurrSamplesPerSec=5.709284132894068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:42:37,618] [INFO] [timer.py:197:stop] 0/4288, RunningAvgSamplesPerSec=6.330009159304688, CurrSamplesPerSec=5.725098322614197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:42:48,894] [INFO] [timer.py:197:stop] 0/4290, RunningAvgSamplesPerSec=6.330021472725454, CurrSamplesPerSec=5.7156183469429696, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:43:00,189] [INFO] [timer.py:197:stop] 0/4292, RunningAvgSamplesPerSec=6.330023543445085, CurrSamplesPerSec=5.6900711368439065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:43:11,467] [INFO] [timer.py:197:stop] 0/4294, RunningAvgSamplesPerSec=6.33003465710397, CurrSamplesPerSec=5.722863979052479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:43:22,753] [INFO] [timer.py:197:stop] 0/4296, RunningAvgSamplesPerSec=6.330044033654043, CurrSamplesPerSec=5.711008949653978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:43:34,052] [INFO] [timer.py:197:stop] 0/4298, RunningAvgSamplesPerSec=6.330051736381823, CurrSamplesPerSec=5.7098993599376096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:43:45,358] [INFO] [logging.py:68:log_dist] [Rank 0] step=2150, skipped=5, lr=[6.346666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:43:45,360] [INFO] [timer.py:197:stop] 0/4300, RunningAvgSamplesPerSec=6.330053968045217, CurrSamplesPerSec=5.711852059591176, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0027, 'learning_rate': 6.346666666666668e-06, 'epoch': 9.11} +[2022-12-17 03:43:56,648] [INFO] [timer.py:197:stop] 0/4302, RunningAvgSamplesPerSec=6.330057923831548, CurrSamplesPerSec=5.699157196643189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:44:07,884] [INFO] [timer.py:197:stop] 0/4304, RunningAvgSamplesPerSec=6.3300738850474225, CurrSamplesPerSec=5.737495354931139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:44:19,393] [INFO] [timer.py:197:stop] 0/4306, RunningAvgSamplesPerSec=6.330077660065506, CurrSamplesPerSec=5.6889624334353215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:44:30,687] [INFO] [timer.py:197:stop] 0/4308, RunningAvgSamplesPerSec=6.330085346146435, CurrSamplesPerSec=5.708143896824679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:44:41,955] [INFO] [timer.py:197:stop] 0/4310, RunningAvgSamplesPerSec=6.330095058863617, CurrSamplesPerSec=5.716104697029398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:44:53,244] [INFO] [timer.py:197:stop] 0/4312, RunningAvgSamplesPerSec=6.330103442896721, CurrSamplesPerSec=5.709839361572828, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:45:04,538] [INFO] [timer.py:197:stop] 0/4314, RunningAvgSamplesPerSec=6.3301104318862835, CurrSamplesPerSec=5.700382210684036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:45:15,848] [INFO] [timer.py:197:stop] 0/4316, RunningAvgSamplesPerSec=6.33011144671632, CurrSamplesPerSec=5.70177754288311, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:45:27,176] [INFO] [timer.py:197:stop] 0/4318, RunningAvgSamplesPerSec=6.330108280879267, CurrSamplesPerSec=5.688100273302122, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:45:38,475] [INFO] [logging.py:68:log_dist] [Rank 0] step=2160, skipped=5, lr=[6.324444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:45:38,477] [INFO] [timer.py:197:stop] 0/4320, RunningAvgSamplesPerSec=6.330114475329232, CurrSamplesPerSec=5.704752106643522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:45:49,797] [INFO] [timer.py:197:stop] 0/4322, RunningAvgSamplesPerSec=6.330116375057936, CurrSamplesPerSec=5.718255560844172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:46:01,100] [INFO] [timer.py:197:stop] 0/4324, RunningAvgSamplesPerSec=6.330120682549647, CurrSamplesPerSec=5.712504066748646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:46:12,375] [INFO] [timer.py:197:stop] 0/4326, RunningAvgSamplesPerSec=6.330133929062189, CurrSamplesPerSec=5.72703455258396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:46:23,714] [INFO] [timer.py:197:stop] 0/4328, RunningAvgSamplesPerSec=6.330129803058916, CurrSamplesPerSec=5.6888122116816495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:46:34,999] [INFO] [timer.py:197:stop] 0/4330, RunningAvgSamplesPerSec=6.330134832486639, CurrSamplesPerSec=5.709437380764239, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:46:46,276] [INFO] [timer.py:197:stop] 0/4332, RunningAvgSamplesPerSec=6.330142065442036, CurrSamplesPerSec=5.700766452184192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:46:57,581] [INFO] [timer.py:197:stop] 0/4334, RunningAvgSamplesPerSec=6.330142658914399, CurrSamplesPerSec=5.686657414993794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:47:08,853] [INFO] [timer.py:197:stop] 0/4336, RunningAvgSamplesPerSec=6.330150998651473, CurrSamplesPerSec=5.711459517079455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:47:20,162] [INFO] [timer.py:197:stop] 0/4338, RunningAvgSamplesPerSec=6.330149550688539, CurrSamplesPerSec=5.666942151202926, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:47:31,422] [INFO] [logging.py:68:log_dist] [Rank 0] step=2170, skipped=5, lr=[6.302222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:47:31,424] [INFO] [timer.py:197:stop] 0/4340, RunningAvgSamplesPerSec=6.330167309081633, CurrSamplesPerSec=5.715221149418475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:47:42,711] [INFO] [timer.py:197:stop] 0/4342, RunningAvgSamplesPerSec=6.33017199685608, CurrSamplesPerSec=5.686261100566463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:47:54,166] [INFO] [timer.py:197:stop] 0/4344, RunningAvgSamplesPerSec=6.330183846704809, CurrSamplesPerSec=5.696465033629745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:48:05,427] [INFO] [timer.py:197:stop] 0/4346, RunningAvgSamplesPerSec=6.330196429462367, CurrSamplesPerSec=5.7171146611417045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:48:16,706] [INFO] [timer.py:197:stop] 0/4348, RunningAvgSamplesPerSec=6.330206849105902, CurrSamplesPerSec=5.714444680646246, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:48:28,017] [INFO] [timer.py:197:stop] 0/4350, RunningAvgSamplesPerSec=6.330208170318338, CurrSamplesPerSec=5.692302394200352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0032, 'learning_rate': 6.291111111111111e-06, 'epoch': 9.22} +[2022-12-17 03:48:39,257] [INFO] [timer.py:197:stop] 0/4352, RunningAvgSamplesPerSec=6.330230027558183, CurrSamplesPerSec=5.74510397307503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:48:50,553] [INFO] [timer.py:197:stop] 0/4354, RunningAvgSamplesPerSec=6.330236067545069, CurrSamplesPerSec=5.6960760535362125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:49:01,820] [INFO] [timer.py:197:stop] 0/4356, RunningAvgSamplesPerSec=6.330245950766376, CurrSamplesPerSec=5.709252318598857, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:49:13,074] [INFO] [timer.py:197:stop] 0/4358, RunningAvgSamplesPerSec=6.3302603683327785, CurrSamplesPerSec=5.723507031478542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:49:24,355] [INFO] [logging.py:68:log_dist] [Rank 0] step=2180, skipped=5, lr=[6.280000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:49:24,357] [INFO] [timer.py:197:stop] 0/4360, RunningAvgSamplesPerSec=6.330268998724679, CurrSamplesPerSec=5.716786164512912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:49:35,681] [INFO] [timer.py:197:stop] 0/4362, RunningAvgSamplesPerSec=6.330269874076075, CurrSamplesPerSec=5.709341448091649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:49:46,987] [INFO] [timer.py:197:stop] 0/4364, RunningAvgSamplesPerSec=6.330273298176687, CurrSamplesPerSec=5.7026029071275754, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:49:58,302] [INFO] [timer.py:197:stop] 0/4366, RunningAvgSamplesPerSec=6.330274186013026, CurrSamplesPerSec=5.711801013859715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:50:09,593] [INFO] [timer.py:197:stop] 0/4368, RunningAvgSamplesPerSec=6.330281236381866, CurrSamplesPerSec=5.714302111692187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:50:20,878] [INFO] [timer.py:197:stop] 0/4370, RunningAvgSamplesPerSec=6.330293064442466, CurrSamplesPerSec=5.723522652006832, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:50:32,180] [INFO] [timer.py:197:stop] 0/4372, RunningAvgSamplesPerSec=6.330291145422422, CurrSamplesPerSec=5.684595016149614, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:50:43,488] [INFO] [timer.py:197:stop] 0/4374, RunningAvgSamplesPerSec=6.330295297779702, CurrSamplesPerSec=5.705242186997218, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:50:54,780] [INFO] [timer.py:197:stop] 0/4376, RunningAvgSamplesPerSec=6.330302984893042, CurrSamplesPerSec=5.711761879416802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:51:06,066] [INFO] [timer.py:197:stop] 0/4378, RunningAvgSamplesPerSec=6.3303119676765185, CurrSamplesPerSec=5.730619763654686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:51:17,354] [INFO] [logging.py:68:log_dist] [Rank 0] step=2190, skipped=5, lr=[6.2577777777777785e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:51:17,355] [INFO] [timer.py:197:stop] 0/4380, RunningAvgSamplesPerSec=6.330315521160135, CurrSamplesPerSec=5.717432966308704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:51:28,622] [INFO] [timer.py:197:stop] 0/4382, RunningAvgSamplesPerSec=6.330325204840329, CurrSamplesPerSec=5.717960792921026, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:51:39,891] [INFO] [timer.py:197:stop] 0/4384, RunningAvgSamplesPerSec=6.330339635750931, CurrSamplesPerSec=5.732306574514285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:51:51,192] [INFO] [timer.py:197:stop] 0/4386, RunningAvgSamplesPerSec=6.3303441565671585, CurrSamplesPerSec=5.7166807321598165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:52:02,549] [INFO] [timer.py:197:stop] 0/4388, RunningAvgSamplesPerSec=6.330334339923144, CurrSamplesPerSec=5.64699055579327, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:52:13,829] [INFO] [timer.py:197:stop] 0/4390, RunningAvgSamplesPerSec=6.330346790628633, CurrSamplesPerSec=5.7156687307214815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:52:25,123] [INFO] [timer.py:197:stop] 0/4392, RunningAvgSamplesPerSec=6.33035745595913, CurrSamplesPerSec=5.719217787639403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:52:36,425] [INFO] [timer.py:197:stop] 0/4394, RunningAvgSamplesPerSec=6.330364386815062, CurrSamplesPerSec=5.710929245018288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:52:47,718] [INFO] [timer.py:197:stop] 0/4396, RunningAvgSamplesPerSec=6.330374101398396, CurrSamplesPerSec=5.722247418954987, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:52:58,994] [INFO] [timer.py:197:stop] 0/4398, RunningAvgSamplesPerSec=6.330389488345945, CurrSamplesPerSec=5.73566454879658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:53:10,356] [INFO] [logging.py:68:log_dist] [Rank 0] step=2200, skipped=5, lr=[6.235555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:53:10,358] [INFO] [timer.py:197:stop] 0/4400, RunningAvgSamplesPerSec=6.330381002027457, CurrSamplesPerSec=5.683259337924224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0028, 'learning_rate': 6.235555555555556e-06, 'epoch': 9.32} +[2022-12-17 03:53:21,684] [INFO] [timer.py:197:stop] 0/4402, RunningAvgSamplesPerSec=6.330380721761244, CurrSamplesPerSec=5.681286215532228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:53:32,959] [INFO] [timer.py:197:stop] 0/4404, RunningAvgSamplesPerSec=6.33039288991234, CurrSamplesPerSec=5.704690276657935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:53:44,300] [INFO] [timer.py:197:stop] 0/4406, RunningAvgSamplesPerSec=6.3303819555293055, CurrSamplesPerSec=5.672073625116918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:53:55,668] [INFO] [timer.py:197:stop] 0/4408, RunningAvgSamplesPerSec=6.3303853532268946, CurrSamplesPerSec=5.713735312329119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:54:06,948] [INFO] [timer.py:197:stop] 0/4410, RunningAvgSamplesPerSec=6.3303942961100095, CurrSamplesPerSec=5.709733942398946, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:54:18,343] [INFO] [timer.py:197:stop] 0/4412, RunningAvgSamplesPerSec=6.330372825528667, CurrSamplesPerSec=5.6119057705684305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:54:29,681] [INFO] [timer.py:197:stop] 0/4414, RunningAvgSamplesPerSec=6.3303670020010205, CurrSamplesPerSec=5.667097441483019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:54:41,064] [INFO] [timer.py:197:stop] 0/4416, RunningAvgSamplesPerSec=6.330358021308729, CurrSamplesPerSec=5.664311884604425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:54:52,354] [INFO] [timer.py:197:stop] 0/4418, RunningAvgSamplesPerSec=6.330364103786946, CurrSamplesPerSec=5.71063547488767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:55:03,657] [INFO] [logging.py:68:log_dist] [Rank 0] step=2210, skipped=5, lr=[6.213333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:55:03,659] [INFO] [timer.py:197:stop] 0/4420, RunningAvgSamplesPerSec=6.330366350568987, CurrSamplesPerSec=5.696660631635473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:55:15,004] [INFO] [timer.py:197:stop] 0/4422, RunningAvgSamplesPerSec=6.330356961663918, CurrSamplesPerSec=5.6580722121687375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:55:26,283] [INFO] [timer.py:197:stop] 0/4424, RunningAvgSamplesPerSec=6.330366423816739, CurrSamplesPerSec=5.7198256510353795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:55:37,536] [INFO] [timer.py:197:stop] 0/4426, RunningAvgSamplesPerSec=6.330382510418656, CurrSamplesPerSec=5.738779356959358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:55:48,852] [INFO] [timer.py:197:stop] 0/4428, RunningAvgSamplesPerSec=6.330381786900965, CurrSamplesPerSec=5.713057246934883, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:56:00,144] [INFO] [timer.py:197:stop] 0/4430, RunningAvgSamplesPerSec=6.330387543094171, CurrSamplesPerSec=5.70596473014713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:56:11,431] [INFO] [timer.py:197:stop] 0/4432, RunningAvgSamplesPerSec=6.3303949265506265, CurrSamplesPerSec=5.709284618611247, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:56:22,718] [INFO] [timer.py:197:stop] 0/4434, RunningAvgSamplesPerSec=6.330398974040821, CurrSamplesPerSec=5.728504821701217, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:56:34,005] [INFO] [timer.py:197:stop] 0/4436, RunningAvgSamplesPerSec=6.3304073938842675, CurrSamplesPerSec=5.695226960885154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:56:45,500] [INFO] [timer.py:197:stop] 0/4438, RunningAvgSamplesPerSec=6.330411289428148, CurrSamplesPerSec=5.701911251660377, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:56:56,822] [INFO] [logging.py:68:log_dist] [Rank 0] step=2220, skipped=5, lr=[6.191111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:56:56,823] [INFO] [timer.py:197:stop] 0/4440, RunningAvgSamplesPerSec=6.33041506366355, CurrSamplesPerSec=5.713683989698677, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:57:08,107] [INFO] [timer.py:197:stop] 0/4442, RunningAvgSamplesPerSec=6.3304343171901, CurrSamplesPerSec=5.7385732499032756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:57:19,373] [INFO] [timer.py:197:stop] 0/4444, RunningAvgSamplesPerSec=6.330443822660566, CurrSamplesPerSec=5.722055426774207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:57:30,636] [INFO] [timer.py:197:stop] 0/4446, RunningAvgSamplesPerSec=6.3304531891264775, CurrSamplesPerSec=5.723357908539943, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:57:41,949] [INFO] [timer.py:197:stop] 0/4448, RunningAvgSamplesPerSec=6.3304547739768315, CurrSamplesPerSec=5.717142910205345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:57:53,236] [INFO] [timer.py:197:stop] 0/4450, RunningAvgSamplesPerSec=6.33046078844013, CurrSamplesPerSec=5.701764705232637, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0027, 'learning_rate': 6.18e-06, 'epoch': 9.43} +[2022-12-17 03:58:04,529] [INFO] [timer.py:197:stop] 0/4452, RunningAvgSamplesPerSec=6.330467659165346, CurrSamplesPerSec=5.713794419628813, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:58:15,810] [INFO] [timer.py:197:stop] 0/4454, RunningAvgSamplesPerSec=6.330472250504414, CurrSamplesPerSec=5.717310218572185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:58:27,136] [INFO] [timer.py:197:stop] 0/4456, RunningAvgSamplesPerSec=6.330470611831681, CurrSamplesPerSec=5.699990516842288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:58:38,446] [INFO] [timer.py:197:stop] 0/4458, RunningAvgSamplesPerSec=6.330472097937645, CurrSamplesPerSec=5.6944752430173935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:58:49,765] [INFO] [logging.py:68:log_dist] [Rank 0] step=2230, skipped=5, lr=[6.16888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 03:58:49,766] [INFO] [timer.py:197:stop] 0/4460, RunningAvgSamplesPerSec=6.330472808261691, CurrSamplesPerSec=5.685759101297652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:59:01,199] [INFO] [timer.py:197:stop] 0/4462, RunningAvgSamplesPerSec=6.330476629555121, CurrSamplesPerSec=5.714341767530068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:59:12,484] [INFO] [timer.py:197:stop] 0/4464, RunningAvgSamplesPerSec=6.330480789170642, CurrSamplesPerSec=5.678016814778323, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:59:23,796] [INFO] [timer.py:197:stop] 0/4466, RunningAvgSamplesPerSec=6.330481948473897, CurrSamplesPerSec=5.708953135855733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:59:35,079] [INFO] [timer.py:197:stop] 0/4468, RunningAvgSamplesPerSec=6.33048619652435, CurrSamplesPerSec=5.698196869284114, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:59:46,403] [INFO] [timer.py:197:stop] 0/4470, RunningAvgSamplesPerSec=6.330483432308348, CurrSamplesPerSec=5.68910446418724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 03:59:57,679] [INFO] [timer.py:197:stop] 0/4472, RunningAvgSamplesPerSec=6.330493710341828, CurrSamplesPerSec=5.72305016874372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:00:08,961] [INFO] [timer.py:197:stop] 0/4474, RunningAvgSamplesPerSec=6.330494027807299, CurrSamplesPerSec=5.6877512402808765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:00:20,267] [INFO] [timer.py:197:stop] 0/4476, RunningAvgSamplesPerSec=6.3304963388366415, CurrSamplesPerSec=5.690306825164165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:00:31,592] [INFO] [timer.py:197:stop] 0/4478, RunningAvgSamplesPerSec=6.330488847791333, CurrSamplesPerSec=5.673577202367607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:00:42,914] [INFO] [logging.py:68:log_dist] [Rank 0] step=2240, skipped=5, lr=[6.146666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:00:42,915] [INFO] [timer.py:197:stop] 0/4480, RunningAvgSamplesPerSec=6.330486086094699, CurrSamplesPerSec=5.688008913144727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:00:54,182] [INFO] [timer.py:197:stop] 0/4482, RunningAvgSamplesPerSec=6.330499265528688, CurrSamplesPerSec=5.71943688629642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:01:05,462] [INFO] [timer.py:197:stop] 0/4484, RunningAvgSamplesPerSec=6.330509348678946, CurrSamplesPerSec=5.71629215134095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:01:16,762] [INFO] [timer.py:197:stop] 0/4486, RunningAvgSamplesPerSec=6.330513088250893, CurrSamplesPerSec=5.699628889360823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:01:28,058] [INFO] [timer.py:197:stop] 0/4488, RunningAvgSamplesPerSec=6.330518727226009, CurrSamplesPerSec=5.707317898735957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:01:39,342] [INFO] [timer.py:197:stop] 0/4490, RunningAvgSamplesPerSec=6.330526565520092, CurrSamplesPerSec=5.702028978709898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:01:50,602] [INFO] [timer.py:197:stop] 0/4492, RunningAvgSamplesPerSec=6.33054246376338, CurrSamplesPerSec=5.729671796777712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:02:01,875] [INFO] [timer.py:197:stop] 0/4494, RunningAvgSamplesPerSec=6.330555609378127, CurrSamplesPerSec=5.725686430644263, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:02:13,375] [INFO] [timer.py:197:stop] 0/4496, RunningAvgSamplesPerSec=6.330563896608057, CurrSamplesPerSec=5.706466179325695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:02:24,676] [INFO] [timer.py:197:stop] 0/4498, RunningAvgSamplesPerSec=6.330571840505921, CurrSamplesPerSec=5.7039484213581675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:02:35,974] [INFO] [logging.py:68:log_dist] [Rank 0] step=2250, skipped=5, lr=[6.124444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:02:35,976] [INFO] [timer.py:197:stop] 0/4500, RunningAvgSamplesPerSec=6.33057808605072, CurrSamplesPerSec=5.700222912132732, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0029, 'learning_rate': 6.124444444444445e-06, 'epoch': 9.53} +[2022-12-17 04:02:47,309] [INFO] [timer.py:197:stop] 0/4502, RunningAvgSamplesPerSec=6.330590813929422, CurrSamplesPerSec=5.704254595111555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:02:58,592] [INFO] [timer.py:197:stop] 0/4504, RunningAvgSamplesPerSec=6.330599689229134, CurrSamplesPerSec=5.697111839696128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:03:09,876] [INFO] [timer.py:197:stop] 0/4506, RunningAvgSamplesPerSec=6.330611905513456, CurrSamplesPerSec=5.717914753402765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:03:21,200] [INFO] [timer.py:197:stop] 0/4508, RunningAvgSamplesPerSec=6.330605019172982, CurrSamplesPerSec=5.673138107069762, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:03:32,518] [INFO] [timer.py:197:stop] 0/4510, RunningAvgSamplesPerSec=6.330605752558324, CurrSamplesPerSec=5.695185878279826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:03:43,803] [INFO] [timer.py:197:stop] 0/4512, RunningAvgSamplesPerSec=6.330615522766701, CurrSamplesPerSec=5.706228665318071, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:03:55,121] [INFO] [timer.py:197:stop] 0/4514, RunningAvgSamplesPerSec=6.3306133846847645, CurrSamplesPerSec=5.685259358026691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:04:06,451] [INFO] [timer.py:197:stop] 0/4516, RunningAvgSamplesPerSec=6.330608909026556, CurrSamplesPerSec=5.700777106120688, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:04:17,795] [INFO] [timer.py:197:stop] 0/4518, RunningAvgSamplesPerSec=6.33060024301429, CurrSamplesPerSec=5.657230118958954, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:04:29,104] [INFO] [logging.py:68:log_dist] [Rank 0] step=2260, skipped=5, lr=[6.102222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:04:29,106] [INFO] [timer.py:197:stop] 0/4520, RunningAvgSamplesPerSec=6.330601376732257, CurrSamplesPerSec=5.713666476974156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:04:40,371] [INFO] [timer.py:197:stop] 0/4522, RunningAvgSamplesPerSec=6.330606514763284, CurrSamplesPerSec=5.709888914787506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:04:51,644] [INFO] [timer.py:197:stop] 0/4524, RunningAvgSamplesPerSec=6.330609315895327, CurrSamplesPerSec=5.686145950865563, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:05:02,950] [INFO] [timer.py:197:stop] 0/4526, RunningAvgSamplesPerSec=6.33061151512052, CurrSamplesPerSec=5.720672585516275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:05:14,252] [INFO] [timer.py:197:stop] 0/4528, RunningAvgSamplesPerSec=6.330615066568691, CurrSamplesPerSec=5.714858803511764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:05:25,506] [INFO] [timer.py:197:stop] 0/4530, RunningAvgSamplesPerSec=6.330622922685323, CurrSamplesPerSec=5.71164496542443, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:05:36,782] [INFO] [timer.py:197:stop] 0/4532, RunningAvgSamplesPerSec=6.330625751387737, CurrSamplesPerSec=5.687373812170686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:05:48,077] [INFO] [timer.py:197:stop] 0/4534, RunningAvgSamplesPerSec=6.3306346663352, CurrSamplesPerSec=5.712731891063314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:05:59,367] [INFO] [timer.py:197:stop] 0/4536, RunningAvgSamplesPerSec=6.330640941125124, CurrSamplesPerSec=5.700456536914458, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:06:10,640] [INFO] [timer.py:197:stop] 0/4538, RunningAvgSamplesPerSec=6.330652404193294, CurrSamplesPerSec=5.715381530744073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:06:21,900] [INFO] [logging.py:68:log_dist] [Rank 0] step=2270, skipped=5, lr=[6.08e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:06:21,902] [INFO] [timer.py:197:stop] 0/4540, RunningAvgSamplesPerSec=6.330666901885576, CurrSamplesPerSec=5.737028654425584, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:06:33,224] [INFO] [timer.py:197:stop] 0/4542, RunningAvgSamplesPerSec=6.330665209915535, CurrSamplesPerSec=5.682952526304001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:06:44,491] [INFO] [timer.py:197:stop] 0/4544, RunningAvgSamplesPerSec=6.330674842163236, CurrSamplesPerSec=5.707736329246788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:06:55,804] [INFO] [timer.py:197:stop] 0/4546, RunningAvgSamplesPerSec=6.330674269839647, CurrSamplesPerSec=5.701357321521387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:07:07,141] [INFO] [timer.py:197:stop] 0/4548, RunningAvgSamplesPerSec=6.330686947984364, CurrSamplesPerSec=5.721433919111742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:07:18,412] [INFO] [timer.py:197:stop] 0/4550, RunningAvgSamplesPerSec=6.330698464805578, CurrSamplesPerSec=5.721675871507792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0036, 'learning_rate': 6.06888888888889e-06, 'epoch': 9.64} +[2022-12-17 04:07:29,707] [INFO] [timer.py:197:stop] 0/4552, RunningAvgSamplesPerSec=6.330710496101829, CurrSamplesPerSec=5.720422916124233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:07:41,068] [INFO] [timer.py:197:stop] 0/4554, RunningAvgSamplesPerSec=6.3307208209518695, CurrSamplesPerSec=5.725109311897351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:07:52,440] [INFO] [timer.py:197:stop] 0/4556, RunningAvgSamplesPerSec=6.330723336313207, CurrSamplesPerSec=5.692474529011012, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:08:03,875] [INFO] [timer.py:197:stop] 0/4558, RunningAvgSamplesPerSec=6.330714611360339, CurrSamplesPerSec=5.664903350183139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:08:15,227] [INFO] [logging.py:68:log_dist] [Rank 0] step=2280, skipped=5, lr=[6.057777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:08:15,228] [INFO] [timer.py:197:stop] 0/4560, RunningAvgSamplesPerSec=6.330726531708481, CurrSamplesPerSec=5.729612115939908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:08:26,573] [INFO] [timer.py:197:stop] 0/4562, RunningAvgSamplesPerSec=6.330743349268332, CurrSamplesPerSec=5.720252987606335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:08:37,916] [INFO] [timer.py:197:stop] 0/4564, RunningAvgSamplesPerSec=6.33075457470811, CurrSamplesPerSec=5.7212497855880375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:08:49,272] [INFO] [timer.py:197:stop] 0/4566, RunningAvgSamplesPerSec=6.330758780378043, CurrSamplesPerSec=5.6899461841189565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:09:00,622] [INFO] [timer.py:197:stop] 0/4568, RunningAvgSamplesPerSec=6.330764615495463, CurrSamplesPerSec=5.712111191518389, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:09:11,925] [INFO] [timer.py:197:stop] 0/4570, RunningAvgSamplesPerSec=6.330760727916034, CurrSamplesPerSec=5.6708976453672735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:09:23,181] [INFO] [timer.py:197:stop] 0/4572, RunningAvgSamplesPerSec=6.330776742335515, CurrSamplesPerSec=5.7173092444048805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:09:34,546] [INFO] [timer.py:197:stop] 0/4574, RunningAvgSamplesPerSec=6.33078097114606, CurrSamplesPerSec=5.702969516215391, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:09:45,942] [INFO] [timer.py:197:stop] 0/4576, RunningAvgSamplesPerSec=6.330784055681368, CurrSamplesPerSec=5.708197790402674, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:09:57,327] [INFO] [timer.py:197:stop] 0/4578, RunningAvgSamplesPerSec=6.330790606004829, CurrSamplesPerSec=5.698967234714678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:10:08,671] [INFO] [logging.py:68:log_dist] [Rank 0] step=2290, skipped=5, lr=[6.0355555555555555e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:10:08,672] [INFO] [timer.py:197:stop] 0/4580, RunningAvgSamplesPerSec=6.33079115086289, CurrSamplesPerSec=5.6993892819688305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:10:20,063] [INFO] [timer.py:197:stop] 0/4582, RunningAvgSamplesPerSec=6.330792257325494, CurrSamplesPerSec=5.690597301088274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:10:31,383] [INFO] [timer.py:197:stop] 0/4584, RunningAvgSamplesPerSec=6.33079201294549, CurrSamplesPerSec=5.686121861559865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:10:42,919] [INFO] [timer.py:197:stop] 0/4586, RunningAvgSamplesPerSec=6.33079594497458, CurrSamplesPerSec=5.688110397827378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:10:54,250] [INFO] [timer.py:197:stop] 0/4588, RunningAvgSamplesPerSec=6.330806829957438, CurrSamplesPerSec=5.736196727782429, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:11:05,530] [INFO] [timer.py:197:stop] 0/4590, RunningAvgSamplesPerSec=6.330812007815817, CurrSamplesPerSec=5.71894777590225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:11:16,800] [INFO] [timer.py:197:stop] 0/4592, RunningAvgSamplesPerSec=6.330825381671413, CurrSamplesPerSec=5.726484770543781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:11:28,344] [INFO] [timer.py:197:stop] 0/4594, RunningAvgSamplesPerSec=6.3308329510418035, CurrSamplesPerSec=5.723417459157915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:11:39,647] [INFO] [timer.py:197:stop] 0/4596, RunningAvgSamplesPerSec=6.330836708797295, CurrSamplesPerSec=5.699751121093182, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:11:51,143] [INFO] [timer.py:197:stop] 0/4598, RunningAvgSamplesPerSec=6.330848828375051, CurrSamplesPerSec=5.73858281883173, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:12:02,491] [INFO] [logging.py:68:log_dist] [Rank 0] step=2300, skipped=5, lr=[6.013333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:12:02,493] [INFO] [timer.py:197:stop] 0/4600, RunningAvgSamplesPerSec=6.330861170154268, CurrSamplesPerSec=5.724425614623796, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0032, 'learning_rate': 6.013333333333335e-06, 'epoch': 9.75} +[2022-12-17 04:12:13,898] [INFO] [timer.py:197:stop] 0/4602, RunningAvgSamplesPerSec=6.330832410767395, CurrSamplesPerSec=5.5755094488871615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:12:25,351] [INFO] [timer.py:197:stop] 0/4604, RunningAvgSamplesPerSec=6.3308431891690775, CurrSamplesPerSec=5.727538489991829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:12:36,684] [INFO] [timer.py:197:stop] 0/4606, RunningAvgSamplesPerSec=6.330837902683982, CurrSamplesPerSec=5.665191716507016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:12:48,317] [INFO] [timer.py:197:stop] 0/4608, RunningAvgSamplesPerSec=6.330755379814556, CurrSamplesPerSec=5.394187377355717, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:12:59,623] [INFO] [timer.py:197:stop] 0/4610, RunningAvgSamplesPerSec=6.330756819466149, CurrSamplesPerSec=5.6961722661655605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:13:10,941] [INFO] [timer.py:197:stop] 0/4612, RunningAvgSamplesPerSec=6.330756151419777, CurrSamplesPerSec=5.695062150719643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:13:22,525] [INFO] [timer.py:197:stop] 0/4614, RunningAvgSamplesPerSec=6.330679185520829, CurrSamplesPerSec=5.406924178710288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:13:33,814] [INFO] [timer.py:197:stop] 0/4616, RunningAvgSamplesPerSec=6.330686433323466, CurrSamplesPerSec=5.6993048191151345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:13:45,112] [INFO] [timer.py:197:stop] 0/4618, RunningAvgSamplesPerSec=6.330687694288799, CurrSamplesPerSec=5.700686306802098, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:13:56,440] [INFO] [logging.py:68:log_dist] [Rank 0] step=2310, skipped=5, lr=[5.991111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:13:56,442] [INFO] [timer.py:197:stop] 0/4620, RunningAvgSamplesPerSec=6.3306887015343865, CurrSamplesPerSec=5.662078148631941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:14:07,726] [INFO] [timer.py:197:stop] 0/4622, RunningAvgSamplesPerSec=6.3306965751068045, CurrSamplesPerSec=5.7048692235839935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:14:19,039] [INFO] [timer.py:197:stop] 0/4624, RunningAvgSamplesPerSec=6.33070326498081, CurrSamplesPerSec=5.682439803730237, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:14:30,421] [INFO] [timer.py:197:stop] 0/4626, RunningAvgSamplesPerSec=6.330692695475473, CurrSamplesPerSec=5.638133414390749, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:14:41,715] [INFO] [timer.py:197:stop] 0/4628, RunningAvgSamplesPerSec=6.330699691584441, CurrSamplesPerSec=5.717189667889637, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:14:53,008] [INFO] [timer.py:197:stop] 0/4630, RunningAvgSamplesPerSec=6.330705696490953, CurrSamplesPerSec=5.695336920186152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:15:04,352] [INFO] [timer.py:197:stop] 0/4632, RunningAvgSamplesPerSec=6.330698305517842, CurrSamplesPerSec=5.6486805486610585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:15:15,647] [INFO] [timer.py:197:stop] 0/4634, RunningAvgSamplesPerSec=6.330704188121081, CurrSamplesPerSec=5.720073561851484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:15:27,219] [INFO] [timer.py:197:stop] 0/4636, RunningAvgSamplesPerSec=6.330706098111885, CurrSamplesPerSec=5.6777554829107535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:15:38,794] [INFO] [timer.py:197:stop] 0/4638, RunningAvgSamplesPerSec=6.3307191696421805, CurrSamplesPerSec=5.740637442241396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:15:50,098] [INFO] [logging.py:68:log_dist] [Rank 0] step=2320, skipped=5, lr=[5.96888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:15:50,100] [INFO] [timer.py:197:stop] 0/4640, RunningAvgSamplesPerSec=6.330714300595713, CurrSamplesPerSec=5.690648692358206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:16:01,486] [INFO] [timer.py:197:stop] 0/4642, RunningAvgSamplesPerSec=6.3306947193985605, CurrSamplesPerSec=5.607131295528686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:16:12,790] [INFO] [timer.py:197:stop] 0/4644, RunningAvgSamplesPerSec=6.3306961430292, CurrSamplesPerSec=5.696737762353815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:16:24,098] [INFO] [timer.py:197:stop] 0/4646, RunningAvgSamplesPerSec=6.330697606070938, CurrSamplesPerSec=5.699602265322378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:16:35,405] [INFO] [timer.py:197:stop] 0/4648, RunningAvgSamplesPerSec=6.330698876455062, CurrSamplesPerSec=5.714086811994762, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:16:46,764] [INFO] [timer.py:197:stop] 0/4650, RunningAvgSamplesPerSec=6.330690469186335, CurrSamplesPerSec=5.701085118955992, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0039, 'learning_rate': 5.957777777777778e-06, 'epoch': 9.85} +[2022-12-17 04:16:58,107] [INFO] [timer.py:197:stop] 0/4652, RunningAvgSamplesPerSec=6.3306828500294925, CurrSamplesPerSec=5.6933512985319545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:17:09,667] [INFO] [timer.py:197:stop] 0/4654, RunningAvgSamplesPerSec=6.330616710020938, CurrSamplesPerSec=5.467337482556833, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:17:21,103] [INFO] [timer.py:197:stop] 0/4656, RunningAvgSamplesPerSec=6.330615721908283, CurrSamplesPerSec=5.708993203154572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:17:32,573] [INFO] [timer.py:197:stop] 0/4658, RunningAvgSamplesPerSec=6.33060873986457, CurrSamplesPerSec=5.676477032949712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:17:44,017] [INFO] [logging.py:68:log_dist] [Rank 0] step=2330, skipped=5, lr=[5.946666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:17:44,034] [INFO] [timer.py:197:stop] 0/4660, RunningAvgSamplesPerSec=6.330570240212617, CurrSamplesPerSec=5.560654459755998, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:17:55,547] [INFO] [timer.py:197:stop] 0/4662, RunningAvgSamplesPerSec=6.33057605313111, CurrSamplesPerSec=5.711914774167443, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:18:07,012] [INFO] [timer.py:197:stop] 0/4664, RunningAvgSamplesPerSec=6.330578144373511, CurrSamplesPerSec=5.7113858757955525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:18:18,541] [INFO] [timer.py:197:stop] 0/4666, RunningAvgSamplesPerSec=6.330524070412179, CurrSamplesPerSec=5.487751174558908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:18:30,062] [INFO] [timer.py:197:stop] 0/4668, RunningAvgSamplesPerSec=6.33053034689637, CurrSamplesPerSec=5.705891958199446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:18:41,533] [INFO] [timer.py:197:stop] 0/4670, RunningAvgSamplesPerSec=6.330536314630159, CurrSamplesPerSec=5.701401641659355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:18:52,850] [INFO] [timer.py:197:stop] 0/4672, RunningAvgSamplesPerSec=6.330536460115227, CurrSamplesPerSec=5.671820509831005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:19:04,157] [INFO] [timer.py:197:stop] 0/4674, RunningAvgSamplesPerSec=6.330554419779344, CurrSamplesPerSec=5.738497926375647, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:19:15,446] [INFO] [timer.py:197:stop] 0/4676, RunningAvgSamplesPerSec=6.330560749126159, CurrSamplesPerSec=5.716416559625785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:19:26,970] [INFO] [timer.py:197:stop] 0/4678, RunningAvgSamplesPerSec=6.330504478971126, CurrSamplesPerSec=5.483726107472392, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:19:38,276] [INFO] [logging.py:68:log_dist] [Rank 0] step=2340, skipped=5, lr=[5.924444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:19:38,277] [INFO] [timer.py:197:stop] 0/4680, RunningAvgSamplesPerSec=6.3305070302567445, CurrSamplesPerSec=5.6899080722178645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:19:49,582] [INFO] [timer.py:197:stop] 0/4682, RunningAvgSamplesPerSec=6.330510689869193, CurrSamplesPerSec=5.6991613106084875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:20:01,233] [INFO] [timer.py:197:stop] 0/4684, RunningAvgSamplesPerSec=6.330422201677251, CurrSamplesPerSec=5.354158658429851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:20:12,510] [INFO] [timer.py:197:stop] 0/4686, RunningAvgSamplesPerSec=6.330433264181468, CurrSamplesPerSec=5.7141668480667915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:20:23,801] [INFO] [timer.py:197:stop] 0/4688, RunningAvgSamplesPerSec=6.330442375056253, CurrSamplesPerSec=5.719161005075831, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:20:35,398] [INFO] [timer.py:197:stop] 0/4690, RunningAvgSamplesPerSec=6.330370208120584, CurrSamplesPerSec=5.415934115753085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:20:46,670] [INFO] [timer.py:197:stop] 0/4692, RunningAvgSamplesPerSec=6.330381828816718, CurrSamplesPerSec=5.713629262790996, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:20:58,159] [INFO] [timer.py:197:stop] 0/4694, RunningAvgSamplesPerSec=6.330391647125279, CurrSamplesPerSec=5.715702320400646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:21:09,552] [INFO] [timer.py:197:stop] 0/4696, RunningAvgSamplesPerSec=6.330390667727303, CurrSamplesPerSec=5.727730361440755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:21:20,834] [INFO] [timer.py:197:stop] 0/4698, RunningAvgSamplesPerSec=6.33039563285138, CurrSamplesPerSec=5.694713229294793, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:21:32,212] [INFO] [logging.py:68:log_dist] [Rank 0] step=2350, skipped=5, lr=[5.902222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:21:32,214] [INFO] [timer.py:197:stop] 0/4700, RunningAvgSamplesPerSec=6.330408088819912, CurrSamplesPerSec=5.717672875415328, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0035, 'learning_rate': 5.902222222222223e-06, 'epoch': 9.96} +[2022-12-17 04:21:43,841] [INFO] [timer.py:197:stop] 0/4702, RunningAvgSamplesPerSec=6.330403336990846, CurrSamplesPerSec=5.721540014669421, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:21:55,098] [INFO] [timer.py:197:stop] 0/4704, RunningAvgSamplesPerSec=6.330418358984936, CurrSamplesPerSec=5.722686341466305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:22:06,354] [INFO] [timer.py:197:stop] 0/4706, RunningAvgSamplesPerSec=6.330429863484377, CurrSamplesPerSec=5.722186428937103, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:22:17,857] [INFO] [timer.py:197:stop] 0/4708, RunningAvgSamplesPerSec=6.3303796798625305, CurrSamplesPerSec=5.741505533618278, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:22:29,135] [INFO] [timer.py:197:stop] 0/4710, RunningAvgSamplesPerSec=6.330390373733256, CurrSamplesPerSec=5.729053768988964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:22:40,498] [INFO] [timer.py:197:stop] 0/4712, RunningAvgSamplesPerSec=6.330378569329705, CurrSamplesPerSec=5.644133107749217, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:22:51,988] [INFO] [timer.py:197:stop] 0/4714, RunningAvgSamplesPerSec=6.330381143253887, CurrSamplesPerSec=5.696042210670469, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:23:03,456] [INFO] [timer.py:197:stop] 0/4716, RunningAvgSamplesPerSec=6.330393422869406, CurrSamplesPerSec=5.716793469438937, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:23:15,099] [INFO] [timer.py:197:stop] 0/4718, RunningAvgSamplesPerSec=6.33030756039602, CurrSamplesPerSec=5.366892319611965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:23:23,834] [INFO] [logging.py:68:log_dist] [Rank 0] step=2360, skipped=5, lr=[5.8800000000000005e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:23:23,835] [INFO] [timer.py:197:stop] 0/4720, RunningAvgSamplesPerSec=6.330972527543288, CurrSamplesPerSec=10.234348433383115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:23:35,092] [INFO] [timer.py:197:stop] 0/4722, RunningAvgSamplesPerSec=6.330985658534921, CurrSamplesPerSec=5.727015247337471, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:23:46,448] [INFO] [timer.py:197:stop] 0/4724, RunningAvgSamplesPerSec=6.330978732945515, CurrSamplesPerSec=5.661985472808679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:23:58,056] [INFO] [timer.py:197:stop] 0/4726, RunningAvgSamplesPerSec=6.3309908988236305, CurrSamplesPerSec=5.7250834260977195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:24:09,589] [INFO] [timer.py:197:stop] 0/4728, RunningAvgSamplesPerSec=6.331000839362033, CurrSamplesPerSec=5.721300756463225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:24:20,996] [INFO] [timer.py:197:stop] 0/4730, RunningAvgSamplesPerSec=6.3309785423666876, CurrSamplesPerSec=5.590156459974123, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:24:32,356] [INFO] [timer.py:197:stop] 0/4732, RunningAvgSamplesPerSec=6.330984497430182, CurrSamplesPerSec=5.707438276169677, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:24:43,838] [INFO] [timer.py:197:stop] 0/4734, RunningAvgSamplesPerSec=6.33099808507498, CurrSamplesPerSec=5.725624878794464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:24:55,147] [INFO] [timer.py:197:stop] 0/4736, RunningAvgSamplesPerSec=6.330995778328434, CurrSamplesPerSec=5.663898840429798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:25:06,588] [INFO] [timer.py:197:stop] 0/4738, RunningAvgSamplesPerSec=6.331004069060532, CurrSamplesPerSec=5.698754540780964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:25:18,033] [INFO] [logging.py:68:log_dist] [Rank 0] step=2370, skipped=5, lr=[5.857777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:25:18,035] [INFO] [timer.py:197:stop] 0/4740, RunningAvgSamplesPerSec=6.331010722188499, CurrSamplesPerSec=5.697760243937208, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:25:29,349] [INFO] [timer.py:197:stop] 0/4742, RunningAvgSamplesPerSec=6.331010941877606, CurrSamplesPerSec=5.6695437279093355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:25:40,631] [INFO] [timer.py:197:stop] 0/4744, RunningAvgSamplesPerSec=6.331019301537191, CurrSamplesPerSec=5.703297153907961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:25:51,912] [INFO] [timer.py:197:stop] 0/4746, RunningAvgSamplesPerSec=6.331027374547529, CurrSamplesPerSec=5.724606778497856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:26:03,303] [INFO] [timer.py:197:stop] 0/4748, RunningAvgSamplesPerSec=6.331006687700225, CurrSamplesPerSec=5.595459055112661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:26:14,592] [INFO] [timer.py:197:stop] 0/4750, RunningAvgSamplesPerSec=6.331013643470817, CurrSamplesPerSec=5.714340551085141, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.003, 'learning_rate': 5.846666666666667e-06, 'epoch': 10.06} +[2022-12-17 04:26:25,876] [INFO] [timer.py:197:stop] 0/4752, RunningAvgSamplesPerSec=6.3310215455665695, CurrSamplesPerSec=5.689917238324662, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:26:37,436] [INFO] [timer.py:197:stop] 0/4754, RunningAvgSamplesPerSec=6.330957040473491, CurrSamplesPerSec=5.434912660706087, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:26:48,726] [INFO] [timer.py:197:stop] 0/4756, RunningAvgSamplesPerSec=6.330964057867343, CurrSamplesPerSec=5.710236783132878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:27:00,107] [INFO] [timer.py:197:stop] 0/4758, RunningAvgSamplesPerSec=6.3309677032167375, CurrSamplesPerSec=5.715751001795795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:27:11,573] [INFO] [logging.py:68:log_dist] [Rank 0] step=2380, skipped=5, lr=[5.8355555555555565e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:27:11,575] [INFO] [timer.py:197:stop] 0/4760, RunningAvgSamplesPerSec=6.330973467975903, CurrSamplesPerSec=5.680981781017513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:27:22,902] [INFO] [timer.py:197:stop] 0/4762, RunningAvgSamplesPerSec=6.3309707590843685, CurrSamplesPerSec=5.6827352515239316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:27:34,292] [INFO] [timer.py:197:stop] 0/4764, RunningAvgSamplesPerSec=6.330987778779006, CurrSamplesPerSec=5.753313279983759, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:27:45,702] [INFO] [timer.py:197:stop] 0/4766, RunningAvgSamplesPerSec=6.33099289520879, CurrSamplesPerSec=5.73214793479881, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:27:57,013] [INFO] [timer.py:197:stop] 0/4768, RunningAvgSamplesPerSec=6.33099395228108, CurrSamplesPerSec=5.686713794908089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:28:08,481] [INFO] [timer.py:197:stop] 0/4770, RunningAvgSamplesPerSec=6.3310024447366215, CurrSamplesPerSec=5.73548121382306, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:28:19,983] [INFO] [timer.py:197:stop] 0/4772, RunningAvgSamplesPerSec=6.330988706634111, CurrSamplesPerSec=5.693727588150975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:28:31,256] [INFO] [timer.py:197:stop] 0/4774, RunningAvgSamplesPerSec=6.330996263719724, CurrSamplesPerSec=5.714685798939968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:28:42,516] [INFO] [timer.py:197:stop] 0/4776, RunningAvgSamplesPerSec=6.331011640842076, CurrSamplesPerSec=5.7397435947875985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:28:53,854] [INFO] [timer.py:197:stop] 0/4778, RunningAvgSamplesPerSec=6.331007745299911, CurrSamplesPerSec=5.724216387060134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:29:05,156] [INFO] [logging.py:68:log_dist] [Rank 0] step=2390, skipped=5, lr=[5.813333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:29:05,158] [INFO] [timer.py:197:stop] 0/4780, RunningAvgSamplesPerSec=6.331010742488278, CurrSamplesPerSec=5.699064996979088, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:29:16,468] [INFO] [timer.py:197:stop] 0/4782, RunningAvgSamplesPerSec=6.331012239137666, CurrSamplesPerSec=5.716506643368661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:29:27,729] [INFO] [timer.py:197:stop] 0/4784, RunningAvgSamplesPerSec=6.331018580974164, CurrSamplesPerSec=5.714587743334454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:29:39,013] [INFO] [timer.py:197:stop] 0/4786, RunningAvgSamplesPerSec=6.331022981960946, CurrSamplesPerSec=5.702331554684467, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:29:50,326] [INFO] [timer.py:197:stop] 0/4788, RunningAvgSamplesPerSec=6.331019901591877, CurrSamplesPerSec=5.690069448256989, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:30:01,604] [INFO] [timer.py:197:stop] 0/4790, RunningAvgSamplesPerSec=6.331029794130788, CurrSamplesPerSec=5.716243947629426, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:30:12,897] [INFO] [timer.py:197:stop] 0/4792, RunningAvgSamplesPerSec=6.331036251624487, CurrSamplesPerSec=5.719949238107081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:30:24,158] [INFO] [timer.py:197:stop] 0/4794, RunningAvgSamplesPerSec=6.331043100155323, CurrSamplesPerSec=5.702531432327052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:30:35,431] [INFO] [timer.py:197:stop] 0/4796, RunningAvgSamplesPerSec=6.331050018922631, CurrSamplesPerSec=5.6975415935642415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:30:46,826] [INFO] [timer.py:197:stop] 0/4798, RunningAvgSamplesPerSec=6.33102938541137, CurrSamplesPerSec=5.688286859627258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:30:58,154] [INFO] [logging.py:68:log_dist] [Rank 0] step=2400, skipped=5, lr=[5.791111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:30:58,156] [INFO] [timer.py:197:stop] 0/4800, RunningAvgSamplesPerSec=6.331025671102678, CurrSamplesPerSec=5.684242320700877, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0021, 'learning_rate': 5.791111111111112e-06, 'epoch': 10.17} +[2022-12-17 04:31:09,453] [INFO] [timer.py:197:stop] 0/4802, RunningAvgSamplesPerSec=6.331027120213348, CurrSamplesPerSec=5.682597147509798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:31:20,797] [INFO] [timer.py:197:stop] 0/4804, RunningAvgSamplesPerSec=6.331016437845667, CurrSamplesPerSec=5.691636164035205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:31:32,093] [INFO] [timer.py:197:stop] 0/4806, RunningAvgSamplesPerSec=6.331014511201373, CurrSamplesPerSec=5.683533932334821, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:31:43,425] [INFO] [timer.py:197:stop] 0/4808, RunningAvgSamplesPerSec=6.331017018483959, CurrSamplesPerSec=5.686735720732231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:31:55,131] [INFO] [timer.py:197:stop] 0/4810, RunningAvgSamplesPerSec=6.331027561745868, CurrSamplesPerSec=5.721124191374545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:32:06,778] [INFO] [timer.py:197:stop] 0/4812, RunningAvgSamplesPerSec=6.331032378578249, CurrSamplesPerSec=5.701915854066887, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:32:18,288] [INFO] [timer.py:197:stop] 0/4814, RunningAvgSamplesPerSec=6.330999850169187, CurrSamplesPerSec=5.556238848627807, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:32:29,583] [INFO] [timer.py:197:stop] 0/4816, RunningAvgSamplesPerSec=6.331008162515726, CurrSamplesPerSec=5.728340769234183, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:32:40,915] [INFO] [timer.py:197:stop] 0/4818, RunningAvgSamplesPerSec=6.331002794289343, CurrSamplesPerSec=5.681108264065701, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:32:52,634] [INFO] [logging.py:68:log_dist] [Rank 0] step=2410, skipped=5, lr=[5.768888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:32:52,636] [INFO] [timer.py:197:stop] 0/4820, RunningAvgSamplesPerSec=6.330896323352138, CurrSamplesPerSec=5.290471094035281, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:33:03,977] [INFO] [timer.py:197:stop] 0/4822, RunningAvgSamplesPerSec=6.330893064737957, CurrSamplesPerSec=5.677873415545301, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:33:15,309] [INFO] [timer.py:197:stop] 0/4824, RunningAvgSamplesPerSec=6.330895551737563, CurrSamplesPerSec=5.6946078846918695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:33:26,713] [INFO] [timer.py:197:stop] 0/4826, RunningAvgSamplesPerSec=6.330872884371823, CurrSamplesPerSec=5.635442266683619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:33:37,998] [INFO] [timer.py:197:stop] 0/4828, RunningAvgSamplesPerSec=6.33088129819278, CurrSamplesPerSec=5.694139921496582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:33:49,282] [INFO] [timer.py:197:stop] 0/4830, RunningAvgSamplesPerSec=6.330889321460371, CurrSamplesPerSec=5.706906323939215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:34:00,782] [INFO] [timer.py:197:stop] 0/4832, RunningAvgSamplesPerSec=6.330877685241006, CurrSamplesPerSec=5.637208456846233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:34:12,038] [INFO] [timer.py:197:stop] 0/4834, RunningAvgSamplesPerSec=6.330888486726185, CurrSamplesPerSec=5.713096642313446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:34:23,435] [INFO] [timer.py:197:stop] 0/4836, RunningAvgSamplesPerSec=6.330895314332208, CurrSamplesPerSec=5.717684566931951, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:34:34,736] [INFO] [timer.py:197:stop] 0/4838, RunningAvgSamplesPerSec=6.330900525193966, CurrSamplesPerSec=5.68793563429523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:34:46,005] [INFO] [logging.py:68:log_dist] [Rank 0] step=2420, skipped=5, lr=[5.746666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:34:46,007] [INFO] [timer.py:197:stop] 0/4840, RunningAvgSamplesPerSec=6.33091151618933, CurrSamplesPerSec=5.7051672509014955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:34:57,541] [INFO] [timer.py:197:stop] 0/4842, RunningAvgSamplesPerSec=6.330914801125688, CurrSamplesPerSec=5.700826744304333, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:35:09,022] [INFO] [timer.py:197:stop] 0/4844, RunningAvgSamplesPerSec=6.3309054184735265, CurrSamplesPerSec=5.671777607111656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:35:20,280] [INFO] [timer.py:197:stop] 0/4846, RunningAvgSamplesPerSec=6.330915487625754, CurrSamplesPerSec=5.706281552323623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:35:31,871] [INFO] [timer.py:197:stop] 0/4848, RunningAvgSamplesPerSec=6.330914274265737, CurrSamplesPerSec=5.6780453994108475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:35:43,422] [INFO] [timer.py:197:stop] 0/4850, RunningAvgSamplesPerSec=6.33090621324211, CurrSamplesPerSec=5.731395739761191, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0021, 'learning_rate': 5.735555555555557e-06, 'epoch': 10.28} +[2022-12-17 04:35:54,655] [INFO] [timer.py:197:stop] 0/4852, RunningAvgSamplesPerSec=6.33092348176634, CurrSamplesPerSec=5.737195166807862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:36:05,945] [INFO] [timer.py:197:stop] 0/4854, RunningAvgSamplesPerSec=6.33093041408174, CurrSamplesPerSec=5.719320876462623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:36:17,387] [INFO] [timer.py:197:stop] 0/4856, RunningAvgSamplesPerSec=6.330920262213994, CurrSamplesPerSec=5.719814194516181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:36:28,701] [INFO] [timer.py:197:stop] 0/4858, RunningAvgSamplesPerSec=6.330922055789028, CurrSamplesPerSec=5.686120416208014, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:36:40,001] [INFO] [logging.py:68:log_dist] [Rank 0] step=2430, skipped=5, lr=[5.724444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:36:40,004] [INFO] [timer.py:197:stop] 0/4860, RunningAvgSamplesPerSec=6.330924720708107, CurrSamplesPerSec=5.714667550123298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:36:51,672] [INFO] [timer.py:197:stop] 0/4862, RunningAvgSamplesPerSec=6.3308836746394785, CurrSamplesPerSec=5.716470122590163, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:37:02,940] [INFO] [timer.py:197:stop] 0/4864, RunningAvgSamplesPerSec=6.330895535122308, CurrSamplesPerSec=5.733101619045565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:37:14,232] [INFO] [timer.py:197:stop] 0/4866, RunningAvgSamplesPerSec=6.330901820379269, CurrSamplesPerSec=5.705878131739231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:37:25,508] [INFO] [timer.py:197:stop] 0/4868, RunningAvgSamplesPerSec=6.33090794129048, CurrSamplesPerSec=5.7053280385360585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:37:36,804] [INFO] [timer.py:197:stop] 0/4870, RunningAvgSamplesPerSec=6.330909647225291, CurrSamplesPerSec=5.696581568772256, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:37:48,184] [INFO] [timer.py:197:stop] 0/4872, RunningAvgSamplesPerSec=6.330889102496775, CurrSamplesPerSec=5.614864792503346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:37:59,850] [INFO] [timer.py:197:stop] 0/4874, RunningAvgSamplesPerSec=6.330887659503165, CurrSamplesPerSec=5.7011781105036325, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:38:11,423] [INFO] [timer.py:197:stop] 0/4876, RunningAvgSamplesPerSec=6.330893858924424, CurrSamplesPerSec=5.698189611806278, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:38:22,772] [INFO] [timer.py:197:stop] 0/4878, RunningAvgSamplesPerSec=6.330885528518763, CurrSamplesPerSec=5.673804330756011, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:38:34,301] [INFO] [logging.py:68:log_dist] [Rank 0] step=2440, skipped=5, lr=[5.702222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:38:34,303] [INFO] [timer.py:197:stop] 0/4880, RunningAvgSamplesPerSec=6.330893383464845, CurrSamplesPerSec=5.717082028950172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:38:45,586] [INFO] [timer.py:197:stop] 0/4882, RunningAvgSamplesPerSec=6.330902580256411, CurrSamplesPerSec=5.728562523461276, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:38:56,843] [INFO] [timer.py:197:stop] 0/4884, RunningAvgSamplesPerSec=6.330914329730515, CurrSamplesPerSec=5.71846191647385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:39:08,130] [INFO] [timer.py:197:stop] 0/4886, RunningAvgSamplesPerSec=6.330922027749416, CurrSamplesPerSec=5.739567116761201, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:39:19,403] [INFO] [timer.py:197:stop] 0/4888, RunningAvgSamplesPerSec=6.330933121823192, CurrSamplesPerSec=5.723764781098753, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:39:31,079] [INFO] [timer.py:197:stop] 0/4890, RunningAvgSamplesPerSec=6.330841332485585, CurrSamplesPerSec=5.349498429200928, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:39:42,308] [INFO] [timer.py:197:stop] 0/4892, RunningAvgSamplesPerSec=6.330855947690373, CurrSamplesPerSec=5.728922696735184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:39:53,592] [INFO] [timer.py:197:stop] 0/4894, RunningAvgSamplesPerSec=6.33086065662833, CurrSamplesPerSec=5.6968968661698085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:40:04,953] [INFO] [timer.py:197:stop] 0/4896, RunningAvgSamplesPerSec=6.330849295325178, CurrSamplesPerSec=5.6243742767222225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:40:16,256] [INFO] [timer.py:197:stop] 0/4898, RunningAvgSamplesPerSec=6.33085324011296, CurrSamplesPerSec=5.696648542370528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:40:27,669] [INFO] [logging.py:68:log_dist] [Rank 0] step=2450, skipped=5, lr=[5.68e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:40:27,671] [INFO] [timer.py:197:stop] 0/4900, RunningAvgSamplesPerSec=6.330869663344087, CurrSamplesPerSec=5.736259242619751, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.002, 'learning_rate': 5.68e-06, 'epoch': 10.38} +[2022-12-17 04:40:39,234] [INFO] [timer.py:197:stop] 0/4902, RunningAvgSamplesPerSec=6.330875795922531, CurrSamplesPerSec=5.697687681112232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:40:50,517] [INFO] [timer.py:197:stop] 0/4904, RunningAvgSamplesPerSec=6.330884282678793, CurrSamplesPerSec=5.71153437551238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:41:01,993] [INFO] [timer.py:197:stop] 0/4906, RunningAvgSamplesPerSec=6.330895576355606, CurrSamplesPerSec=5.721800514246041, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:41:13,570] [INFO] [timer.py:197:stop] 0/4908, RunningAvgSamplesPerSec=6.330898229565119, CurrSamplesPerSec=5.7448197088231945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:41:24,882] [INFO] [timer.py:197:stop] 0/4910, RunningAvgSamplesPerSec=6.330898886215009, CurrSamplesPerSec=5.6970586388875954, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:41:36,127] [INFO] [timer.py:197:stop] 0/4912, RunningAvgSamplesPerSec=6.330912386969782, CurrSamplesPerSec=5.699116541305495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:41:47,493] [INFO] [timer.py:197:stop] 0/4914, RunningAvgSamplesPerSec=6.330903721488429, CurrSamplesPerSec=5.698654369615493, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:41:58,764] [INFO] [timer.py:197:stop] 0/4916, RunningAvgSamplesPerSec=6.330914261240133, CurrSamplesPerSec=5.732406952943924, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:42:09,996] [INFO] [timer.py:197:stop] 0/4918, RunningAvgSamplesPerSec=6.330934678578837, CurrSamplesPerSec=5.731016657378124, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:42:21,272] [INFO] [logging.py:68:log_dist] [Rank 0] step=2460, skipped=5, lr=[5.657777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:42:21,274] [INFO] [timer.py:197:stop] 0/4920, RunningAvgSamplesPerSec=6.330939690491742, CurrSamplesPerSec=5.706582638769841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:42:32,531] [INFO] [timer.py:197:stop] 0/4922, RunningAvgSamplesPerSec=6.330953956718717, CurrSamplesPerSec=5.741506024833396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:42:43,762] [INFO] [timer.py:197:stop] 0/4924, RunningAvgSamplesPerSec=6.330971782272438, CurrSamplesPerSec=5.735810391804533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:42:55,146] [INFO] [timer.py:197:stop] 0/4926, RunningAvgSamplesPerSec=6.330967983484556, CurrSamplesPerSec=5.715938432909474, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:43:06,393] [INFO] [timer.py:197:stop] 0/4928, RunningAvgSamplesPerSec=6.330985542332555, CurrSamplesPerSec=5.745078397976903, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:43:17,660] [INFO] [timer.py:197:stop] 0/4930, RunningAvgSamplesPerSec=6.330994799172411, CurrSamplesPerSec=5.723397202046335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:43:28,909] [INFO] [timer.py:197:stop] 0/4932, RunningAvgSamplesPerSec=6.3310084101233475, CurrSamplesPerSec=5.7396984310883274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:43:40,183] [INFO] [timer.py:197:stop] 0/4934, RunningAvgSamplesPerSec=6.331015231943894, CurrSamplesPerSec=5.7254964057845985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:43:51,455] [INFO] [timer.py:197:stop] 0/4936, RunningAvgSamplesPerSec=6.3310223319318535, CurrSamplesPerSec=5.731285852101456, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:44:02,808] [INFO] [timer.py:197:stop] 0/4938, RunningAvgSamplesPerSec=6.331024615497289, CurrSamplesPerSec=5.694806496347404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:44:14,054] [INFO] [logging.py:68:log_dist] [Rank 0] step=2470, skipped=5, lr=[5.635555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:44:14,056] [INFO] [timer.py:197:stop] 0/4940, RunningAvgSamplesPerSec=6.331037883835673, CurrSamplesPerSec=5.726190863716782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:44:25,339] [INFO] [timer.py:197:stop] 0/4942, RunningAvgSamplesPerSec=6.331046275843268, CurrSamplesPerSec=5.72533814277841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:44:36,837] [INFO] [timer.py:197:stop] 0/4944, RunningAvgSamplesPerSec=6.330999783404063, CurrSamplesPerSec=5.714860750176948, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:44:48,130] [INFO] [timer.py:197:stop] 0/4946, RunningAvgSamplesPerSec=6.331006276714516, CurrSamplesPerSec=5.693992083341906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:44:59,676] [INFO] [timer.py:197:stop] 0/4948, RunningAvgSamplesPerSec=6.330950637601543, CurrSamplesPerSec=5.472202700799134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:45:11,183] [INFO] [timer.py:197:stop] 0/4950, RunningAvgSamplesPerSec=6.330955165485506, CurrSamplesPerSec=5.697722027285682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0024, 'learning_rate': 5.624444444444445e-06, 'epoch': 10.49} +[2022-12-17 04:45:22,615] [INFO] [timer.py:197:stop] 0/4952, RunningAvgSamplesPerSec=6.330968195653805, CurrSamplesPerSec=5.728223420030052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:45:33,954] [INFO] [timer.py:197:stop] 0/4954, RunningAvgSamplesPerSec=6.330962540471415, CurrSamplesPerSec=5.636947553409113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:45:45,419] [INFO] [timer.py:197:stop] 0/4956, RunningAvgSamplesPerSec=6.33096635642239, CurrSamplesPerSec=5.693265324059259, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:45:56,717] [INFO] [timer.py:197:stop] 0/4958, RunningAvgSamplesPerSec=6.330967716644158, CurrSamplesPerSec=5.69654095017297, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:46:07,973] [INFO] [logging.py:68:log_dist] [Rank 0] step=2480, skipped=5, lr=[5.613333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:46:07,974] [INFO] [timer.py:197:stop] 0/4960, RunningAvgSamplesPerSec=6.330982785160217, CurrSamplesPerSec=5.729682559028319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:46:19,241] [INFO] [timer.py:197:stop] 0/4962, RunningAvgSamplesPerSec=6.330991543986501, CurrSamplesPerSec=5.71776031937525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:46:30,562] [INFO] [timer.py:197:stop] 0/4964, RunningAvgSamplesPerSec=6.330986218687438, CurrSamplesPerSec=5.683666064780153, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:46:41,955] [INFO] [timer.py:197:stop] 0/4966, RunningAvgSamplesPerSec=6.330967592057502, CurrSamplesPerSec=5.610417109930005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:46:53,258] [INFO] [timer.py:197:stop] 0/4968, RunningAvgSamplesPerSec=6.330971016468183, CurrSamplesPerSec=5.715656560645305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:47:04,541] [INFO] [timer.py:197:stop] 0/4970, RunningAvgSamplesPerSec=6.3309794255496845, CurrSamplesPerSec=5.734612982910162, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:47:16,033] [INFO] [timer.py:197:stop] 0/4972, RunningAvgSamplesPerSec=6.330936383650636, CurrSamplesPerSec=5.529107192673739, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:47:27,313] [INFO] [timer.py:197:stop] 0/4974, RunningAvgSamplesPerSec=6.330946328971878, CurrSamplesPerSec=5.729163081966281, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:47:38,607] [INFO] [timer.py:197:stop] 0/4976, RunningAvgSamplesPerSec=6.330953184900717, CurrSamplesPerSec=5.726244121402355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:47:50,207] [INFO] [timer.py:197:stop] 0/4978, RunningAvgSamplesPerSec=6.330953538099298, CurrSamplesPerSec=5.68242609069436, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:48:01,496] [INFO] [logging.py:68:log_dist] [Rank 0] step=2490, skipped=5, lr=[5.591111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:48:01,497] [INFO] [timer.py:197:stop] 0/4980, RunningAvgSamplesPerSec=6.33096013014356, CurrSamplesPerSec=5.694920305619834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:48:12,985] [INFO] [timer.py:197:stop] 0/4982, RunningAvgSamplesPerSec=6.330969576658594, CurrSamplesPerSec=5.704752349116692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:48:24,480] [INFO] [timer.py:197:stop] 0/4984, RunningAvgSamplesPerSec=6.330971504946974, CurrSamplesPerSec=5.693244555334464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:48:35,802] [INFO] [timer.py:197:stop] 0/4986, RunningAvgSamplesPerSec=6.33097084853042, CurrSamplesPerSec=5.688880208346578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:48:47,248] [INFO] [timer.py:197:stop] 0/4988, RunningAvgSamplesPerSec=6.3309845144832435, CurrSamplesPerSec=5.742247609891225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:48:58,752] [INFO] [timer.py:197:stop] 0/4990, RunningAvgSamplesPerSec=6.330976160204492, CurrSamplesPerSec=5.674807318807295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:49:10,042] [INFO] [timer.py:197:stop] 0/4992, RunningAvgSamplesPerSec=6.3309835858007135, CurrSamplesPerSec=5.713671584841052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:49:21,342] [INFO] [timer.py:197:stop] 0/4994, RunningAvgSamplesPerSec=6.330989254123995, CurrSamplesPerSec=5.733567190966732, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:49:32,658] [INFO] [timer.py:197:stop] 0/4996, RunningAvgSamplesPerSec=6.330985781418266, CurrSamplesPerSec=5.712304461873893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:49:43,973] [INFO] [timer.py:197:stop] 0/4998, RunningAvgSamplesPerSec=6.330986864519867, CurrSamplesPerSec=5.688001440544016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:49:55,252] [INFO] [logging.py:68:log_dist] [Rank 0] step=2500, skipped=5, lr=[5.56888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:49:55,254] [INFO] [timer.py:197:stop] 0/5000, RunningAvgSamplesPerSec=6.3309958963633886, CurrSamplesPerSec=5.718180282509813, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0025, 'learning_rate': 5.56888888888889e-06, 'epoch': 10.59} +[2022-12-17 04:50:06,731] [INFO] [timer.py:197:stop] 0/5002, RunningAvgSamplesPerSec=6.3309565040950755, CurrSamplesPerSec=5.710061386113425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:50:18,001] [INFO] [timer.py:197:stop] 0/5004, RunningAvgSamplesPerSec=6.330958386668482, CurrSamplesPerSec=5.690579205791651, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:50:29,295] [INFO] [timer.py:197:stop] 0/5006, RunningAvgSamplesPerSec=6.33096474195664, CurrSamplesPerSec=5.70624128047807, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:50:40,632] [INFO] [timer.py:197:stop] 0/5008, RunningAvgSamplesPerSec=6.330959864889557, CurrSamplesPerSec=5.72188784057362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:50:51,923] [INFO] [timer.py:197:stop] 0/5010, RunningAvgSamplesPerSec=6.330966035087153, CurrSamplesPerSec=5.7113448027691005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:51:03,174] [INFO] [timer.py:197:stop] 0/5012, RunningAvgSamplesPerSec=6.330974127924187, CurrSamplesPerSec=5.714591636296829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:51:14,882] [INFO] [timer.py:197:stop] 0/5014, RunningAvgSamplesPerSec=6.330981396072004, CurrSamplesPerSec=5.719529258836262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:51:26,372] [INFO] [timer.py:197:stop] 0/5016, RunningAvgSamplesPerSec=6.330988349564209, CurrSamplesPerSec=5.709294332972175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:51:37,680] [INFO] [timer.py:197:stop] 0/5018, RunningAvgSamplesPerSec=6.330990138475348, CurrSamplesPerSec=5.697592868472862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:51:49,248] [INFO] [logging.py:68:log_dist] [Rank 0] step=2510, skipped=5, lr=[5.546666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:51:49,250] [INFO] [timer.py:197:stop] 0/5020, RunningAvgSamplesPerSec=6.330983495949797, CurrSamplesPerSec=5.667921172586138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:52:00,554] [INFO] [timer.py:197:stop] 0/5022, RunningAvgSamplesPerSec=6.330982696273461, CurrSamplesPerSec=5.700785823007439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:52:12,061] [INFO] [timer.py:197:stop] 0/5024, RunningAvgSamplesPerSec=6.330933809736621, CurrSamplesPerSec=5.477729075145169, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:52:23,555] [INFO] [timer.py:197:stop] 0/5026, RunningAvgSamplesPerSec=6.330939921329972, CurrSamplesPerSec=5.714190932308468, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:52:34,858] [INFO] [timer.py:197:stop] 0/5028, RunningAvgSamplesPerSec=6.330943473294494, CurrSamplesPerSec=5.708029315662384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:52:46,444] [INFO] [timer.py:197:stop] 0/5030, RunningAvgSamplesPerSec=6.330879968150159, CurrSamplesPerSec=5.430234106849332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:52:57,897] [INFO] [timer.py:197:stop] 0/5032, RunningAvgSamplesPerSec=6.330886515803439, CurrSamplesPerSec=5.708739938717838, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:53:09,136] [INFO] [timer.py:197:stop] 0/5034, RunningAvgSamplesPerSec=6.330906438784912, CurrSamplesPerSec=5.731909991688547, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:53:20,483] [INFO] [timer.py:197:stop] 0/5036, RunningAvgSamplesPerSec=6.33089898868953, CurrSamplesPerSec=5.645936337730132, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:53:32,121] [INFO] [timer.py:197:stop] 0/5038, RunningAvgSamplesPerSec=6.3309053412966305, CurrSamplesPerSec=5.706116101744382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:53:43,376] [INFO] [logging.py:68:log_dist] [Rank 0] step=2520, skipped=5, lr=[5.524444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:53:43,378] [INFO] [timer.py:197:stop] 0/5040, RunningAvgSamplesPerSec=6.330912990471432, CurrSamplesPerSec=5.70689030866712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:53:54,672] [INFO] [timer.py:197:stop] 0/5042, RunningAvgSamplesPerSec=6.3309159228723315, CurrSamplesPerSec=5.6894517339217705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:54:05,967] [INFO] [timer.py:197:stop] 0/5044, RunningAvgSamplesPerSec=6.3309220000445, CurrSamplesPerSec=5.7000314267222265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:54:17,231] [INFO] [timer.py:197:stop] 0/5046, RunningAvgSamplesPerSec=6.330935750681338, CurrSamplesPerSec=5.738988914310895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:54:28,522] [INFO] [timer.py:197:stop] 0/5048, RunningAvgSamplesPerSec=6.330939183099069, CurrSamplesPerSec=5.689545311073165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:54:39,793] [INFO] [timer.py:197:stop] 0/5050, RunningAvgSamplesPerSec=6.330947485759522, CurrSamplesPerSec=5.712155679007506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0021, 'learning_rate': 5.513333333333334e-06, 'epoch': 10.7} +[2022-12-17 04:54:51,133] [INFO] [timer.py:197:stop] 0/5052, RunningAvgSamplesPerSec=6.33095712793302, CurrSamplesPerSec=5.7344730809313536, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:55:02,649] [INFO] [timer.py:197:stop] 0/5054, RunningAvgSamplesPerSec=6.330958595704715, CurrSamplesPerSec=5.691896602840322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:55:13,982] [INFO] [timer.py:197:stop] 0/5056, RunningAvgSamplesPerSec=6.330955353948697, CurrSamplesPerSec=5.671333757852513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:55:25,520] [INFO] [timer.py:197:stop] 0/5058, RunningAvgSamplesPerSec=6.330959551487751, CurrSamplesPerSec=5.705458275887765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:55:36,931] [INFO] [logging.py:68:log_dist] [Rank 0] step=2530, skipped=5, lr=[5.5022222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:55:36,933] [INFO] [timer.py:197:stop] 0/5060, RunningAvgSamplesPerSec=6.330948864322162, CurrSamplesPerSec=5.692745185578915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:55:48,218] [INFO] [timer.py:197:stop] 0/5062, RunningAvgSamplesPerSec=6.330952978062427, CurrSamplesPerSec=5.707607201453538, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:55:59,759] [INFO] [timer.py:197:stop] 0/5064, RunningAvgSamplesPerSec=6.330951656585579, CurrSamplesPerSec=5.691243740564786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:56:11,357] [INFO] [timer.py:197:stop] 0/5066, RunningAvgSamplesPerSec=6.3309381307146335, CurrSamplesPerSec=5.699524088901511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:56:22,657] [INFO] [timer.py:197:stop] 0/5068, RunningAvgSamplesPerSec=6.33094166677426, CurrSamplesPerSec=5.704085140967916, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:56:33,962] [INFO] [timer.py:197:stop] 0/5070, RunningAvgSamplesPerSec=6.330944843263456, CurrSamplesPerSec=5.68741405914248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:56:45,316] [INFO] [timer.py:197:stop] 0/5072, RunningAvgSamplesPerSec=6.330935756879842, CurrSamplesPerSec=5.718039719534393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:56:56,621] [INFO] [timer.py:197:stop] 0/5074, RunningAvgSamplesPerSec=6.330939117650655, CurrSamplesPerSec=5.707029838678644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:57:07,918] [INFO] [timer.py:197:stop] 0/5076, RunningAvgSamplesPerSec=6.330944165593699, CurrSamplesPerSec=5.705340164667865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:57:19,572] [INFO] [timer.py:197:stop] 0/5078, RunningAvgSamplesPerSec=6.3309475667119015, CurrSamplesPerSec=5.720259082428829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:57:30,872] [INFO] [logging.py:68:log_dist] [Rank 0] step=2540, skipped=5, lr=[5.480000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:57:30,874] [INFO] [timer.py:197:stop] 0/5080, RunningAvgSamplesPerSec=6.330950895937812, CurrSamplesPerSec=5.7041610183311, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:57:42,225] [INFO] [timer.py:197:stop] 0/5082, RunningAvgSamplesPerSec=6.3309426889380465, CurrSamplesPerSec=5.622238335501086, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:57:53,760] [INFO] [timer.py:197:stop] 0/5084, RunningAvgSamplesPerSec=6.330952854774201, CurrSamplesPerSec=5.729616273990773, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:58:05,143] [INFO] [timer.py:197:stop] 0/5086, RunningAvgSamplesPerSec=6.3309542655873265, CurrSamplesPerSec=5.703234143735746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:58:16,445] [INFO] [timer.py:197:stop] 0/5088, RunningAvgSamplesPerSec=6.330960905784509, CurrSamplesPerSec=5.7160389692080775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:58:28,076] [INFO] [timer.py:197:stop] 0/5090, RunningAvgSamplesPerSec=6.330968824088808, CurrSamplesPerSec=5.706404554792997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:58:39,364] [INFO] [timer.py:197:stop] 0/5092, RunningAvgSamplesPerSec=6.330972199325294, CurrSamplesPerSec=5.693679764246615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:58:50,726] [INFO] [timer.py:197:stop] 0/5094, RunningAvgSamplesPerSec=6.330961521446245, CurrSamplesPerSec=5.635776153192244, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:59:02,022] [INFO] [timer.py:197:stop] 0/5096, RunningAvgSamplesPerSec=6.33096897362408, CurrSamplesPerSec=5.719917061077151, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:59:13,270] [INFO] [timer.py:197:stop] 0/5098, RunningAvgSamplesPerSec=6.330985743984386, CurrSamplesPerSec=5.742667983797375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:59:24,542] [INFO] [logging.py:68:log_dist] [Rank 0] step=2550, skipped=5, lr=[5.4577777777777785e-06], mom=[[0.9, 0.999]] +[2022-12-17 04:59:24,543] [INFO] [timer.py:197:stop] 0/5100, RunningAvgSamplesPerSec=6.330996104084737, CurrSamplesPerSec=5.716025093527925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0018, 'learning_rate': 5.4577777777777785e-06, 'epoch': 10.81} +[2022-12-17 04:59:35,910] [INFO] [timer.py:197:stop] 0/5102, RunningAvgSamplesPerSec=6.330983921264206, CurrSamplesPerSec=5.626090613198088, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:59:47,213] [INFO] [timer.py:197:stop] 0/5104, RunningAvgSamplesPerSec=6.330988431641169, CurrSamplesPerSec=5.70301798095724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 04:59:58,451] [INFO] [timer.py:197:stop] 0/5106, RunningAvgSamplesPerSec=6.331007527523088, CurrSamplesPerSec=5.757828958869349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:00:09,982] [INFO] [timer.py:197:stop] 0/5108, RunningAvgSamplesPerSec=6.331019128346327, CurrSamplesPerSec=5.731908033391001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:00:21,267] [INFO] [timer.py:197:stop] 0/5110, RunningAvgSamplesPerSec=6.331032777766586, CurrSamplesPerSec=5.727329034819482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:00:32,810] [INFO] [timer.py:197:stop] 0/5112, RunningAvgSamplesPerSec=6.330977003613128, CurrSamplesPerSec=5.451910508189052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:00:44,368] [INFO] [timer.py:197:stop] 0/5114, RunningAvgSamplesPerSec=6.330978114522191, CurrSamplesPerSec=5.696982224919324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:00:55,670] [INFO] [timer.py:197:stop] 0/5116, RunningAvgSamplesPerSec=6.330987497891552, CurrSamplesPerSec=5.707285135595816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:01:07,354] [INFO] [timer.py:197:stop] 0/5118, RunningAvgSamplesPerSec=6.330899081644487, CurrSamplesPerSec=5.3293482591493655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:01:18,753] [INFO] [logging.py:68:log_dist] [Rank 0] step=2560, skipped=5, lr=[5.435555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:01:18,755] [INFO] [timer.py:197:stop] 0/5120, RunningAvgSamplesPerSec=6.330901902893264, CurrSamplesPerSec=5.715834005486816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:01:30,224] [INFO] [timer.py:197:stop] 0/5122, RunningAvgSamplesPerSec=6.330903788047218, CurrSamplesPerSec=5.690690433368548, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:01:41,549] [INFO] [timer.py:197:stop] 0/5124, RunningAvgSamplesPerSec=6.330899001074299, CurrSamplesPerSec=5.678515045294561, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:01:53,137] [INFO] [timer.py:197:stop] 0/5126, RunningAvgSamplesPerSec=6.330897736958658, CurrSamplesPerSec=5.681188581320298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:02:04,450] [INFO] [timer.py:197:stop] 0/5128, RunningAvgSamplesPerSec=6.330906470721249, CurrSamplesPerSec=5.7209881168777335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:02:16,010] [INFO] [timer.py:197:stop] 0/5130, RunningAvgSamplesPerSec=6.330847665324243, CurrSamplesPerSec=5.448622762386459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:02:27,308] [INFO] [timer.py:197:stop] 0/5132, RunningAvgSamplesPerSec=6.330851968143579, CurrSamplesPerSec=5.695187811565611, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:02:38,633] [INFO] [timer.py:197:stop] 0/5134, RunningAvgSamplesPerSec=6.3308504728612265, CurrSamplesPerSec=5.6930532969919625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:02:50,042] [INFO] [timer.py:197:stop] 0/5136, RunningAvgSamplesPerSec=6.330828568356297, CurrSamplesPerSec=5.590546942353913, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:03:01,351] [INFO] [timer.py:197:stop] 0/5138, RunningAvgSamplesPerSec=6.330831029018583, CurrSamplesPerSec=5.692884024759842, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:03:12,670] [INFO] [logging.py:68:log_dist] [Rank 0] step=2570, skipped=5, lr=[5.413333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:03:12,671] [INFO] [timer.py:197:stop] 0/5140, RunningAvgSamplesPerSec=6.330831830621784, CurrSamplesPerSec=5.692991961810583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:03:24,122] [INFO] [timer.py:197:stop] 0/5142, RunningAvgSamplesPerSec=6.3307995165787485, CurrSamplesPerSec=5.557602468839612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:03:35,475] [INFO] [timer.py:197:stop] 0/5144, RunningAvgSamplesPerSec=6.330791148922355, CurrSamplesPerSec=5.66601202336115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:03:46,865] [INFO] [timer.py:197:stop] 0/5146, RunningAvgSamplesPerSec=6.330793328574028, CurrSamplesPerSec=5.694739566054518, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:03:58,525] [INFO] [timer.py:197:stop] 0/5148, RunningAvgSamplesPerSec=6.3307985999441545, CurrSamplesPerSec=5.71201687060952, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:04:09,847] [INFO] [timer.py:197:stop] 0/5150, RunningAvgSamplesPerSec=6.330798264943373, CurrSamplesPerSec=5.688799432348064, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0022, 'learning_rate': 5.402222222222223e-06, 'epoch': 10.91} +[2022-12-17 05:04:21,267] [INFO] [timer.py:197:stop] 0/5152, RunningAvgSamplesPerSec=6.3308049922936895, CurrSamplesPerSec=5.739957886590591, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:04:32,804] [INFO] [timer.py:197:stop] 0/5154, RunningAvgSamplesPerSec=6.330808207297696, CurrSamplesPerSec=5.714203582698553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:04:44,095] [INFO] [timer.py:197:stop] 0/5156, RunningAvgSamplesPerSec=6.330814103852655, CurrSamplesPerSec=5.70229884876786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:04:55,418] [INFO] [timer.py:197:stop] 0/5158, RunningAvgSamplesPerSec=6.330826468927088, CurrSamplesPerSec=5.736627004540996, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:05:06,939] [INFO] [logging.py:68:log_dist] [Rank 0] step=2580, skipped=5, lr=[5.391111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:05:06,941] [INFO] [timer.py:197:stop] 0/5160, RunningAvgSamplesPerSec=6.3308266043403725, CurrSamplesPerSec=5.698408553854269, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:05:18,251] [INFO] [timer.py:197:stop] 0/5162, RunningAvgSamplesPerSec=6.330825475976958, CurrSamplesPerSec=5.6952141527152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:05:29,525] [INFO] [timer.py:197:stop] 0/5164, RunningAvgSamplesPerSec=6.330832549112537, CurrSamplesPerSec=5.7298214935295935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:05:41,133] [INFO] [timer.py:197:stop] 0/5166, RunningAvgSamplesPerSec=6.330825425802116, CurrSamplesPerSec=5.6973268294471975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:05:52,434] [INFO] [timer.py:197:stop] 0/5168, RunningAvgSamplesPerSec=6.3308264370216225, CurrSamplesPerSec=5.683646328760508, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:06:03,725] [INFO] [timer.py:197:stop] 0/5170, RunningAvgSamplesPerSec=6.330833681391844, CurrSamplesPerSec=5.717644133973545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:06:15,065] [INFO] [timer.py:197:stop] 0/5172, RunningAvgSamplesPerSec=6.330828764624116, CurrSamplesPerSec=5.7181651783578396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:06:26,405] [INFO] [timer.py:197:stop] 0/5174, RunningAvgSamplesPerSec=6.330823795604273, CurrSamplesPerSec=5.675452815519628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:06:37,674] [INFO] [timer.py:197:stop] 0/5176, RunningAvgSamplesPerSec=6.33083273222414, CurrSamplesPerSec=5.721885157322745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:06:49,031] [INFO] [timer.py:197:stop] 0/5178, RunningAvgSamplesPerSec=6.330822949193593, CurrSamplesPerSec=5.683569311458349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:07:00,323] [INFO] [logging.py:68:log_dist] [Rank 0] step=2590, skipped=5, lr=[5.368888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:07:00,326] [INFO] [timer.py:197:stop] 0/5180, RunningAvgSamplesPerSec=6.3308245978339865, CurrSamplesPerSec=5.687645430029632, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:07:11,607] [INFO] [timer.py:197:stop] 0/5182, RunningAvgSamplesPerSec=6.330829600063001, CurrSamplesPerSec=5.702640220108133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:07:22,922] [INFO] [timer.py:197:stop] 0/5184, RunningAvgSamplesPerSec=6.3308299773962595, CurrSamplesPerSec=5.6947854747502085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:07:34,227] [INFO] [timer.py:197:stop] 0/5186, RunningAvgSamplesPerSec=6.330840511623829, CurrSamplesPerSec=5.7253488887550406, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:07:45,694] [INFO] [timer.py:197:stop] 0/5188, RunningAvgSamplesPerSec=6.330802234493931, CurrSamplesPerSec=5.545931811238111, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:07:57,240] [INFO] [timer.py:197:stop] 0/5190, RunningAvgSamplesPerSec=6.3308106513252556, CurrSamplesPerSec=5.714275107111477, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:08:05,702] [INFO] [timer.py:197:stop] 0/5192, RunningAvgSamplesPerSec=6.331415568524979, CurrSamplesPerSec=10.224405036507427, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:08:16,994] [INFO] [timer.py:197:stop] 0/5194, RunningAvgSamplesPerSec=6.33142151258471, CurrSamplesPerSec=5.708924967667167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:08:28,320] [INFO] [timer.py:197:stop] 0/5196, RunningAvgSamplesPerSec=6.331419035586852, CurrSamplesPerSec=5.720213493471337, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:08:39,593] [INFO] [timer.py:197:stop] 0/5198, RunningAvgSamplesPerSec=6.331429428749043, CurrSamplesPerSec=5.717296823800851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:08:50,859] [INFO] [logging.py:68:log_dist] [Rank 0] step=2600, skipped=5, lr=[5.346666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:08:50,861] [INFO] [timer.py:197:stop] 0/5200, RunningAvgSamplesPerSec=6.33144061780039, CurrSamplesPerSec=5.7277724037809055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0016, 'learning_rate': 5.346666666666667e-06, 'epoch': 11.02} +[2022-12-17 05:09:02,114] [INFO] [timer.py:197:stop] 0/5202, RunningAvgSamplesPerSec=6.331448483641698, CurrSamplesPerSec=5.719249225680997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:09:13,434] [INFO] [timer.py:197:stop] 0/5204, RunningAvgSamplesPerSec=6.331448183327159, CurrSamplesPerSec=5.699337248682659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:09:24,734] [INFO] [timer.py:197:stop] 0/5206, RunningAvgSamplesPerSec=6.331452657655695, CurrSamplesPerSec=5.700206208089163, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:09:36,014] [INFO] [timer.py:197:stop] 0/5208, RunningAvgSamplesPerSec=6.331461622033795, CurrSamplesPerSec=5.72109761000533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:09:47,276] [INFO] [timer.py:197:stop] 0/5210, RunningAvgSamplesPerSec=6.3314746264100314, CurrSamplesPerSec=5.716316496958784, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:09:58,571] [INFO] [timer.py:197:stop] 0/5212, RunningAvgSamplesPerSec=6.331480978826958, CurrSamplesPerSec=5.725256083923386, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:10:09,843] [INFO] [timer.py:197:stop] 0/5214, RunningAvgSamplesPerSec=6.331491432789969, CurrSamplesPerSec=5.725326664166113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:10:21,125] [INFO] [timer.py:197:stop] 0/5216, RunningAvgSamplesPerSec=6.331499899781304, CurrSamplesPerSec=5.7110269321049865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:10:32,392] [INFO] [timer.py:197:stop] 0/5218, RunningAvgSamplesPerSec=6.331511769858337, CurrSamplesPerSec=5.712665997638112, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:10:43,678] [INFO] [logging.py:68:log_dist] [Rank 0] step=2610, skipped=5, lr=[5.324444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:10:43,680] [INFO] [timer.py:197:stop] 0/5220, RunningAvgSamplesPerSec=6.331518616398237, CurrSamplesPerSec=5.7057262876552635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:10:54,977] [INFO] [timer.py:197:stop] 0/5222, RunningAvgSamplesPerSec=6.331523063457298, CurrSamplesPerSec=5.700985107994975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:11:06,265] [INFO] [timer.py:197:stop] 0/5224, RunningAvgSamplesPerSec=6.331530146740582, CurrSamplesPerSec=5.712085180028847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:11:17,572] [INFO] [timer.py:197:stop] 0/5226, RunningAvgSamplesPerSec=6.331532193459119, CurrSamplesPerSec=5.693007416219925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:11:28,880] [INFO] [timer.py:197:stop] 0/5228, RunningAvgSamplesPerSec=6.331534394982881, CurrSamplesPerSec=5.702848600275808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:11:40,160] [INFO] [timer.py:197:stop] 0/5230, RunningAvgSamplesPerSec=6.331543863225301, CurrSamplesPerSec=5.727150386796147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:11:51,450] [INFO] [timer.py:197:stop] 0/5232, RunningAvgSamplesPerSec=6.3315500633562944, CurrSamplesPerSec=5.715176370807533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:12:02,731] [INFO] [timer.py:197:stop] 0/5234, RunningAvgSamplesPerSec=6.331554300362814, CurrSamplesPerSec=5.718985546768476, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:12:14,015] [INFO] [timer.py:197:stop] 0/5236, RunningAvgSamplesPerSec=6.3315630531989795, CurrSamplesPerSec=5.7190143016978965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:12:25,279] [INFO] [timer.py:197:stop] 0/5238, RunningAvgSamplesPerSec=6.331575216395316, CurrSamplesPerSec=5.733168474540448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:12:36,556] [INFO] [logging.py:68:log_dist] [Rank 0] step=2620, skipped=5, lr=[5.302222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:12:36,557] [INFO] [timer.py:197:stop] 0/5240, RunningAvgSamplesPerSec=6.331583901131349, CurrSamplesPerSec=5.7215373317447575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:12:47,841] [INFO] [timer.py:197:stop] 0/5242, RunningAvgSamplesPerSec=6.331591212199751, CurrSamplesPerSec=5.712049201910398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:12:59,137] [INFO] [timer.py:197:stop] 0/5244, RunningAvgSamplesPerSec=6.331595922910599, CurrSamplesPerSec=5.70338658227336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:13:10,454] [INFO] [timer.py:197:stop] 0/5246, RunningAvgSamplesPerSec=6.3315953067483335, CurrSamplesPerSec=5.677981985152896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:13:21,789] [INFO] [timer.py:197:stop] 0/5248, RunningAvgSamplesPerSec=6.331597185771025, CurrSamplesPerSec=5.705788625223031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:13:33,159] [INFO] [timer.py:197:stop] 0/5250, RunningAvgSamplesPerSec=6.331600861122208, CurrSamplesPerSec=5.699541273008275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0011, 'learning_rate': 5.2911111111111115e-06, 'epoch': 11.12} +[2022-12-17 05:13:44,480] [INFO] [timer.py:197:stop] 0/5252, RunningAvgSamplesPerSec=6.331599661255974, CurrSamplesPerSec=5.68312072716045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:13:55,971] [INFO] [timer.py:197:stop] 0/5254, RunningAvgSamplesPerSec=6.331603344469879, CurrSamplesPerSec=5.695243394093243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:14:07,295] [INFO] [timer.py:197:stop] 0/5256, RunningAvgSamplesPerSec=6.331597969028604, CurrSamplesPerSec=5.66516708705912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:14:18,576] [INFO] [timer.py:197:stop] 0/5258, RunningAvgSamplesPerSec=6.331607527232612, CurrSamplesPerSec=5.733064396149069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:14:29,890] [INFO] [logging.py:68:log_dist] [Rank 0] step=2630, skipped=5, lr=[5.28e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:14:29,893] [INFO] [timer.py:197:stop] 0/5260, RunningAvgSamplesPerSec=6.331607577304003, CurrSamplesPerSec=5.703569083004653, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:14:41,225] [INFO] [timer.py:197:stop] 0/5262, RunningAvgSamplesPerSec=6.331622020508325, CurrSamplesPerSec=5.732581277009406, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:14:52,454] [INFO] [timer.py:197:stop] 0/5264, RunningAvgSamplesPerSec=6.331639133639575, CurrSamplesPerSec=5.738322751616373, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:15:03,721] [INFO] [timer.py:197:stop] 0/5266, RunningAvgSamplesPerSec=6.33165171193482, CurrSamplesPerSec=5.7073268783289315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:15:15,023] [INFO] [timer.py:197:stop] 0/5268, RunningAvgSamplesPerSec=6.331655237231968, CurrSamplesPerSec=5.6875104613911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:15:26,312] [INFO] [timer.py:197:stop] 0/5270, RunningAvgSamplesPerSec=6.331662476795234, CurrSamplesPerSec=5.703980176569691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:15:37,605] [INFO] [timer.py:197:stop] 0/5272, RunningAvgSamplesPerSec=6.33166852698621, CurrSamplesPerSec=5.722610214448023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:15:48,870] [INFO] [timer.py:197:stop] 0/5274, RunningAvgSamplesPerSec=6.33168179634596, CurrSamplesPerSec=5.734479941114413, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:16:00,126] [INFO] [timer.py:197:stop] 0/5276, RunningAvgSamplesPerSec=6.331695376311775, CurrSamplesPerSec=5.709654759127998, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:16:11,408] [INFO] [timer.py:197:stop] 0/5278, RunningAvgSamplesPerSec=6.331700742788226, CurrSamplesPerSec=5.71670313316583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:16:22,679] [INFO] [logging.py:68:log_dist] [Rank 0] step=2640, skipped=5, lr=[5.257777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:16:22,681] [INFO] [timer.py:197:stop] 0/5280, RunningAvgSamplesPerSec=6.331707264117074, CurrSamplesPerSec=5.704465032869232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:16:34,130] [INFO] [timer.py:197:stop] 0/5282, RunningAvgSamplesPerSec=6.331710296952607, CurrSamplesPerSec=5.699808971206135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:16:45,425] [INFO] [timer.py:197:stop] 0/5284, RunningAvgSamplesPerSec=6.331715787203118, CurrSamplesPerSec=5.7361687804135455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:16:56,757] [INFO] [timer.py:197:stop] 0/5286, RunningAvgSamplesPerSec=6.331716077735873, CurrSamplesPerSec=5.7076763763304585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:17:08,057] [INFO] [timer.py:197:stop] 0/5288, RunningAvgSamplesPerSec=6.3317203145894725, CurrSamplesPerSec=5.714706237753005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:17:19,368] [INFO] [timer.py:197:stop] 0/5290, RunningAvgSamplesPerSec=6.331722296646484, CurrSamplesPerSec=5.699027246796303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:17:30,729] [INFO] [timer.py:197:stop] 0/5292, RunningAvgSamplesPerSec=6.331713096218976, CurrSamplesPerSec=5.672534851579894, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:17:42,048] [INFO] [timer.py:197:stop] 0/5294, RunningAvgSamplesPerSec=6.331713210379387, CurrSamplesPerSec=5.700520696294161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:17:53,394] [INFO] [timer.py:197:stop] 0/5296, RunningAvgSamplesPerSec=6.331707193585852, CurrSamplesPerSec=5.692714762593404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:18:04,741] [INFO] [timer.py:197:stop] 0/5298, RunningAvgSamplesPerSec=6.3317006432874114, CurrSamplesPerSec=5.682712394124242, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:18:16,059] [INFO] [logging.py:68:log_dist] [Rank 0] step=2650, skipped=5, lr=[5.235555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:18:16,061] [INFO] [timer.py:197:stop] 0/5300, RunningAvgSamplesPerSec=6.331700367364679, CurrSamplesPerSec=5.707803322659478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0012, 'learning_rate': 5.235555555555556e-06, 'epoch': 11.23} +[2022-12-17 05:18:27,358] [INFO] [timer.py:197:stop] 0/5302, RunningAvgSamplesPerSec=6.331705986411841, CurrSamplesPerSec=5.715463793550649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:18:38,619] [INFO] [timer.py:197:stop] 0/5304, RunningAvgSamplesPerSec=6.331712476197681, CurrSamplesPerSec=5.721632455156021, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:18:49,899] [INFO] [timer.py:197:stop] 0/5306, RunningAvgSamplesPerSec=6.331716192002087, CurrSamplesPerSec=5.710224879116211, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:19:01,197] [INFO] [timer.py:197:stop] 0/5308, RunningAvgSamplesPerSec=6.331718498716094, CurrSamplesPerSec=5.696512420788828, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:19:12,489] [INFO] [timer.py:197:stop] 0/5310, RunningAvgSamplesPerSec=6.331716879305524, CurrSamplesPerSec=5.6943266626871045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:19:23,899] [INFO] [timer.py:197:stop] 0/5312, RunningAvgSamplesPerSec=6.331722781967347, CurrSamplesPerSec=5.709408964927718, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:19:35,201] [INFO] [timer.py:197:stop] 0/5314, RunningAvgSamplesPerSec=6.331726558596379, CurrSamplesPerSec=5.717023340647629, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:19:46,664] [INFO] [timer.py:197:stop] 0/5316, RunningAvgSamplesPerSec=6.33172464843963, CurrSamplesPerSec=5.688770015987062, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:19:57,951] [INFO] [timer.py:197:stop] 0/5318, RunningAvgSamplesPerSec=6.331731507360793, CurrSamplesPerSec=5.714302841549262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:20:09,198] [INFO] [logging.py:68:log_dist] [Rank 0] step=2660, skipped=5, lr=[5.213333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:20:09,200] [INFO] [timer.py:197:stop] 0/5320, RunningAvgSamplesPerSec=6.331747607613066, CurrSamplesPerSec=5.7378510105983604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:20:20,482] [INFO] [timer.py:197:stop] 0/5322, RunningAvgSamplesPerSec=6.33175641041769, CurrSamplesPerSec=5.727818113304493, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:20:31,752] [INFO] [timer.py:197:stop] 0/5324, RunningAvgSamplesPerSec=6.331768182998982, CurrSamplesPerSec=5.730554190945366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:20:43,017] [INFO] [timer.py:197:stop] 0/5326, RunningAvgSamplesPerSec=6.331777148866358, CurrSamplesPerSec=5.71666222711335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:20:54,287] [INFO] [timer.py:197:stop] 0/5328, RunningAvgSamplesPerSec=6.331788167273932, CurrSamplesPerSec=5.723797489623256, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:21:05,551] [INFO] [timer.py:197:stop] 0/5330, RunningAvgSamplesPerSec=6.331800719530591, CurrSamplesPerSec=5.728999480872663, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:21:16,893] [INFO] [timer.py:197:stop] 0/5332, RunningAvgSamplesPerSec=6.331788250487433, CurrSamplesPerSec=5.615644743162119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:21:28,160] [INFO] [timer.py:197:stop] 0/5334, RunningAvgSamplesPerSec=6.331799635445216, CurrSamplesPerSec=5.722074454648978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:21:39,415] [INFO] [timer.py:197:stop] 0/5336, RunningAvgSamplesPerSec=6.331810093831878, CurrSamplesPerSec=5.7304957150562466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:21:50,651] [INFO] [timer.py:197:stop] 0/5338, RunningAvgSamplesPerSec=6.331828149224771, CurrSamplesPerSec=5.744462205318481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:22:01,961] [INFO] [logging.py:68:log_dist] [Rank 0] step=2670, skipped=5, lr=[5.1911111111111116e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:22:01,963] [INFO] [timer.py:197:stop] 0/5340, RunningAvgSamplesPerSec=6.331828700824646, CurrSamplesPerSec=5.703318723094472, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:22:13,215] [INFO] [timer.py:197:stop] 0/5342, RunningAvgSamplesPerSec=6.331843623981074, CurrSamplesPerSec=5.747786195521225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:22:24,454] [INFO] [timer.py:197:stop] 0/5344, RunningAvgSamplesPerSec=6.3318581877227516, CurrSamplesPerSec=5.73446230067685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:22:35,701] [INFO] [timer.py:197:stop] 0/5346, RunningAvgSamplesPerSec=6.331876988632842, CurrSamplesPerSec=5.737213559784555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:22:46,939] [INFO] [timer.py:197:stop] 0/5348, RunningAvgSamplesPerSec=6.331884598979178, CurrSamplesPerSec=5.723346681922929, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:22:58,261] [INFO] [timer.py:197:stop] 0/5350, RunningAvgSamplesPerSec=6.331906735436208, CurrSamplesPerSec=5.763013587580053, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0012, 'learning_rate': 5.18e-06, 'epoch': 11.33} +[2022-12-17 05:23:09,544] [INFO] [timer.py:197:stop] 0/5352, RunningAvgSamplesPerSec=6.331915096942259, CurrSamplesPerSec=5.732819765586832, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:23:20,792] [INFO] [timer.py:197:stop] 0/5354, RunningAvgSamplesPerSec=6.331930152230148, CurrSamplesPerSec=5.7316884677646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:23:32,086] [INFO] [timer.py:197:stop] 0/5356, RunningAvgSamplesPerSec=6.3319429083498004, CurrSamplesPerSec=5.744001253755843, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:23:43,371] [INFO] [timer.py:197:stop] 0/5358, RunningAvgSamplesPerSec=6.331957376154482, CurrSamplesPerSec=5.740309183024957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:23:54,647] [INFO] [logging.py:68:log_dist] [Rank 0] step=2680, skipped=5, lr=[5.168888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:23:54,648] [INFO] [timer.py:197:stop] 0/5360, RunningAvgSamplesPerSec=6.331973042164556, CurrSamplesPerSec=5.735553271893245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:24:05,930] [INFO] [timer.py:197:stop] 0/5362, RunningAvgSamplesPerSec=6.3319876983431085, CurrSamplesPerSec=5.732323467223491, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:24:17,235] [INFO] [timer.py:197:stop] 0/5364, RunningAvgSamplesPerSec=6.33199726527146, CurrSamplesPerSec=5.740857202934483, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:24:28,512] [INFO] [timer.py:197:stop] 0/5366, RunningAvgSamplesPerSec=6.3320090287277955, CurrSamplesPerSec=5.725859613384882, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:24:39,937] [INFO] [timer.py:197:stop] 0/5368, RunningAvgSamplesPerSec=6.332025183926404, CurrSamplesPerSec=5.734573290236572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:24:51,212] [INFO] [timer.py:197:stop] 0/5370, RunningAvgSamplesPerSec=6.3320424111755464, CurrSamplesPerSec=5.725142768419131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:25:02,503] [INFO] [timer.py:197:stop] 0/5372, RunningAvgSamplesPerSec=6.332055348608328, CurrSamplesPerSec=5.726065296764906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:25:13,783] [INFO] [timer.py:197:stop] 0/5374, RunningAvgSamplesPerSec=6.332067066686548, CurrSamplesPerSec=5.70496088367438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:25:25,093] [INFO] [timer.py:197:stop] 0/5376, RunningAvgSamplesPerSec=6.332075611655805, CurrSamplesPerSec=5.725530843801487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:25:36,352] [INFO] [timer.py:197:stop] 0/5378, RunningAvgSamplesPerSec=6.332091996769401, CurrSamplesPerSec=5.747384268558034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:25:47,653] [INFO] [logging.py:68:log_dist] [Rank 0] step=2690, skipped=5, lr=[5.146666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:25:47,655] [INFO] [timer.py:197:stop] 0/5380, RunningAvgSamplesPerSec=6.332102105174284, CurrSamplesPerSec=5.742849321290859, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:25:58,906] [INFO] [timer.py:197:stop] 0/5382, RunningAvgSamplesPerSec=6.332119985619664, CurrSamplesPerSec=5.75074712347447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:26:10,209] [INFO] [timer.py:197:stop] 0/5384, RunningAvgSamplesPerSec=6.332129202412391, CurrSamplesPerSec=5.7269793252631995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:26:21,476] [INFO] [timer.py:197:stop] 0/5386, RunningAvgSamplesPerSec=6.332142626128737, CurrSamplesPerSec=5.722102508797743, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:26:32,768] [INFO] [timer.py:197:stop] 0/5388, RunningAvgSamplesPerSec=6.332153973047589, CurrSamplesPerSec=5.720645764517236, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:26:44,052] [INFO] [timer.py:197:stop] 0/5390, RunningAvgSamplesPerSec=6.332168222200867, CurrSamplesPerSec=5.758247911655947, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:26:55,350] [INFO] [timer.py:197:stop] 0/5392, RunningAvgSamplesPerSec=6.332179117202303, CurrSamplesPerSec=5.721149797514245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:27:06,619] [INFO] [timer.py:197:stop] 0/5394, RunningAvgSamplesPerSec=6.332194122074683, CurrSamplesPerSec=5.742245153181966, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:27:17,923] [INFO] [timer.py:197:stop] 0/5396, RunningAvgSamplesPerSec=6.332204587974897, CurrSamplesPerSec=5.7320757174664925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:27:29,221] [INFO] [timer.py:197:stop] 0/5398, RunningAvgSamplesPerSec=6.332216127969198, CurrSamplesPerSec=5.722261568824939, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:27:40,523] [INFO] [logging.py:68:log_dist] [Rank 0] step=2700, skipped=5, lr=[5.124444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:27:40,525] [INFO] [timer.py:197:stop] 0/5400, RunningAvgSamplesPerSec=6.332226431537867, CurrSamplesPerSec=5.727787558729274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0011, 'learning_rate': 5.124444444444445e-06, 'epoch': 11.44} +[2022-12-17 05:27:51,834] [INFO] [timer.py:197:stop] 0/5402, RunningAvgSamplesPerSec=6.3322330237590885, CurrSamplesPerSec=5.6972134076352186, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:28:03,079] [INFO] [timer.py:197:stop] 0/5404, RunningAvgSamplesPerSec=6.332250320317892, CurrSamplesPerSec=5.752729839098712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:28:14,392] [INFO] [timer.py:197:stop] 0/5406, RunningAvgSamplesPerSec=6.332256759997134, CurrSamplesPerSec=5.721441235920432, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:28:25,656] [INFO] [timer.py:197:stop] 0/5408, RunningAvgSamplesPerSec=6.332269340859704, CurrSamplesPerSec=5.725855949325616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:28:36,913] [INFO] [timer.py:197:stop] 0/5410, RunningAvgSamplesPerSec=6.3322827126004, CurrSamplesPerSec=5.731097413139867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:28:48,192] [INFO] [timer.py:197:stop] 0/5412, RunningAvgSamplesPerSec=6.332287783761879, CurrSamplesPerSec=5.724056974223865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:28:59,486] [INFO] [timer.py:197:stop] 0/5414, RunningAvgSamplesPerSec=6.332293646201773, CurrSamplesPerSec=5.699748942659988, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:29:10,933] [INFO] [timer.py:197:stop] 0/5416, RunningAvgSamplesPerSec=6.3323149190795425, CurrSamplesPerSec=5.744511623716563, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:29:22,206] [INFO] [timer.py:197:stop] 0/5418, RunningAvgSamplesPerSec=6.332322147474618, CurrSamplesPerSec=5.681815807331453, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:29:33,427] [INFO] [logging.py:68:log_dist] [Rank 0] step=2710, skipped=5, lr=[5.102222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:29:33,429] [INFO] [timer.py:197:stop] 0/5420, RunningAvgSamplesPerSec=6.332336561497905, CurrSamplesPerSec=5.7329387724417895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:29:44,696] [INFO] [timer.py:197:stop] 0/5422, RunningAvgSamplesPerSec=6.332344831258414, CurrSamplesPerSec=5.70700921207328, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:29:55,939] [INFO] [timer.py:197:stop] 0/5424, RunningAvgSamplesPerSec=6.332363229604519, CurrSamplesPerSec=5.73533685803425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:30:07,205] [INFO] [timer.py:197:stop] 0/5426, RunningAvgSamplesPerSec=6.332375270831397, CurrSamplesPerSec=5.724526938018898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:30:18,440] [INFO] [timer.py:197:stop] 0/5428, RunningAvgSamplesPerSec=6.332391038467731, CurrSamplesPerSec=5.740212455607973, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:30:29,715] [INFO] [timer.py:197:stop] 0/5430, RunningAvgSamplesPerSec=6.3324006160843105, CurrSamplesPerSec=5.7166308175061005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:30:40,990] [INFO] [timer.py:197:stop] 0/5432, RunningAvgSamplesPerSec=6.332410610941344, CurrSamplesPerSec=5.717756665674356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:30:52,266] [INFO] [timer.py:197:stop] 0/5434, RunningAvgSamplesPerSec=6.332420587194725, CurrSamplesPerSec=5.7336568363246085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:31:03,530] [INFO] [timer.py:197:stop] 0/5436, RunningAvgSamplesPerSec=6.3324257712121925, CurrSamplesPerSec=5.696499365064274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:31:14,767] [INFO] [timer.py:197:stop] 0/5438, RunningAvgSamplesPerSec=6.332444231804994, CurrSamplesPerSec=5.7353971483628605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:31:26,034] [INFO] [logging.py:68:log_dist] [Rank 0] step=2720, skipped=5, lr=[5.0800000000000005e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:31:26,036] [INFO] [timer.py:197:stop] 0/5440, RunningAvgSamplesPerSec=6.3324517740998765, CurrSamplesPerSec=5.721149797514245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:31:37,312] [INFO] [timer.py:197:stop] 0/5442, RunningAvgSamplesPerSec=6.332461417845187, CurrSamplesPerSec=5.694489497472948, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:31:48,559] [INFO] [timer.py:197:stop] 0/5444, RunningAvgSamplesPerSec=6.332477879576189, CurrSamplesPerSec=5.737828443549427, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:31:59,873] [INFO] [timer.py:197:stop] 0/5446, RunningAvgSamplesPerSec=6.332479712229146, CurrSamplesPerSec=5.698187434566532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:32:11,115] [INFO] [timer.py:197:stop] 0/5448, RunningAvgSamplesPerSec=6.332497359605725, CurrSamplesPerSec=5.72765410016634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:32:22,609] [INFO] [timer.py:197:stop] 0/5450, RunningAvgSamplesPerSec=6.332510747841616, CurrSamplesPerSec=5.726563199659353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.001, 'learning_rate': 5.06888888888889e-06, 'epoch': 11.55} +[2022-12-17 05:32:33,869] [INFO] [timer.py:197:stop] 0/5452, RunningAvgSamplesPerSec=6.332524405237324, CurrSamplesPerSec=5.726556114074656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:32:45,116] [INFO] [timer.py:197:stop] 0/5454, RunningAvgSamplesPerSec=6.332536431214967, CurrSamplesPerSec=5.723373772312986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:32:56,506] [INFO] [timer.py:197:stop] 0/5456, RunningAvgSamplesPerSec=6.332548455978119, CurrSamplesPerSec=5.7462179403283145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:33:07,765] [INFO] [timer.py:197:stop] 0/5458, RunningAvgSamplesPerSec=6.332557599737585, CurrSamplesPerSec=5.7280026698697135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:33:19,023] [INFO] [logging.py:68:log_dist] [Rank 0] step=2730, skipped=5, lr=[5.057777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:33:19,025] [INFO] [timer.py:197:stop] 0/5460, RunningAvgSamplesPerSec=6.332571210787725, CurrSamplesPerSec=5.732027736787424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:33:30,266] [INFO] [timer.py:197:stop] 0/5462, RunningAvgSamplesPerSec=6.332581482932561, CurrSamplesPerSec=5.711506667941586, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:33:41,533] [INFO] [timer.py:197:stop] 0/5464, RunningAvgSamplesPerSec=6.332593738459713, CurrSamplesPerSec=5.729059148948412, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:33:52,804] [INFO] [timer.py:197:stop] 0/5466, RunningAvgSamplesPerSec=6.332604211069591, CurrSamplesPerSec=5.727068031611117, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:34:04,060] [INFO] [timer.py:197:stop] 0/5468, RunningAvgSamplesPerSec=6.3326183044704, CurrSamplesPerSec=5.736048413717722, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:34:15,314] [INFO] [timer.py:197:stop] 0/5470, RunningAvgSamplesPerSec=6.3326320905877935, CurrSamplesPerSec=5.743125526686731, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:34:26,583] [INFO] [timer.py:197:stop] 0/5472, RunningAvgSamplesPerSec=6.332643210929138, CurrSamplesPerSec=5.72707291909308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:34:37,832] [INFO] [timer.py:197:stop] 0/5474, RunningAvgSamplesPerSec=6.332658774956965, CurrSamplesPerSec=5.7351113932424616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:34:49,079] [INFO] [timer.py:197:stop] 0/5476, RunningAvgSamplesPerSec=6.332671219062701, CurrSamplesPerSec=5.729178977936781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:35:00,334] [INFO] [timer.py:197:stop] 0/5478, RunningAvgSamplesPerSec=6.332685789743696, CurrSamplesPerSec=5.738412545745183, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:35:11,572] [INFO] [logging.py:68:log_dist] [Rank 0] step=2740, skipped=5, lr=[5.035555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:35:11,574] [INFO] [timer.py:197:stop] 0/5480, RunningAvgSamplesPerSec=6.332703090955735, CurrSamplesPerSec=5.726971749917317, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:35:22,856] [INFO] [timer.py:197:stop] 0/5482, RunningAvgSamplesPerSec=6.3327110287281245, CurrSamplesPerSec=5.710764253771904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:35:34,087] [INFO] [timer.py:197:stop] 0/5484, RunningAvgSamplesPerSec=6.332726896523725, CurrSamplesPerSec=5.744172104506499, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:35:45,344] [INFO] [timer.py:197:stop] 0/5486, RunningAvgSamplesPerSec=6.332737779608204, CurrSamplesPerSec=5.7382054837343786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:35:56,769] [INFO] [timer.py:197:stop] 0/5488, RunningAvgSamplesPerSec=6.332749506085508, CurrSamplesPerSec=5.738857387065375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:36:08,018] [INFO] [timer.py:197:stop] 0/5490, RunningAvgSamplesPerSec=6.332762552584889, CurrSamplesPerSec=5.727755782316844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:36:19,293] [INFO] [timer.py:197:stop] 0/5492, RunningAvgSamplesPerSec=6.332772741523956, CurrSamplesPerSec=5.733351171866924, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:36:30,595] [INFO] [timer.py:197:stop] 0/5494, RunningAvgSamplesPerSec=6.332773836264852, CurrSamplesPerSec=5.676408612249814, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:36:41,894] [INFO] [timer.py:197:stop] 0/5496, RunningAvgSamplesPerSec=6.332779233746077, CurrSamplesPerSec=5.698465892839091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:36:53,225] [INFO] [timer.py:197:stop] 0/5498, RunningAvgSamplesPerSec=6.332780467389881, CurrSamplesPerSec=5.677425009408602, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:37:04,548] [INFO] [logging.py:68:log_dist] [Rank 0] step=2750, skipped=5, lr=[5.013333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:37:04,550] [INFO] [timer.py:197:stop] 0/5500, RunningAvgSamplesPerSec=6.33278092368322, CurrSamplesPerSec=5.686835714271792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0012, 'learning_rate': 5.013333333333333e-06, 'epoch': 11.65} +[2022-12-17 05:37:15,905] [INFO] [timer.py:197:stop] 0/5502, RunningAvgSamplesPerSec=6.332783612977782, CurrSamplesPerSec=5.701471877168966, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:37:27,211] [INFO] [timer.py:197:stop] 0/5504, RunningAvgSamplesPerSec=6.332793342968272, CurrSamplesPerSec=5.712175856621704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:37:38,561] [INFO] [timer.py:197:stop] 0/5506, RunningAvgSamplesPerSec=6.332789638900559, CurrSamplesPerSec=5.665064984738845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:37:49,847] [INFO] [timer.py:197:stop] 0/5508, RunningAvgSamplesPerSec=6.332797026608971, CurrSamplesPerSec=5.710434299877502, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:38:01,243] [INFO] [timer.py:197:stop] 0/5510, RunningAvgSamplesPerSec=6.3328017604467, CurrSamplesPerSec=5.685818353795128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:38:12,604] [INFO] [timer.py:197:stop] 0/5512, RunningAvgSamplesPerSec=6.3328063775165155, CurrSamplesPerSec=5.723522652006832, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:38:23,903] [INFO] [timer.py:197:stop] 0/5514, RunningAvgSamplesPerSec=6.33281705103715, CurrSamplesPerSec=5.715605203494661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:38:35,238] [INFO] [timer.py:197:stop] 0/5516, RunningAvgSamplesPerSec=6.332823395964448, CurrSamplesPerSec=5.708910398023532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:38:46,529] [INFO] [timer.py:197:stop] 0/5518, RunningAvgSamplesPerSec=6.33282605944014, CurrSamplesPerSec=5.6991516306995935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:38:57,861] [INFO] [logging.py:68:log_dist] [Rank 0] step=2760, skipped=5, lr=[4.991111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:38:57,863] [INFO] [timer.py:197:stop] 0/5520, RunningAvgSamplesPerSec=6.33282949171402, CurrSamplesPerSec=5.710830589312635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:39:09,216] [INFO] [timer.py:197:stop] 0/5522, RunningAvgSamplesPerSec=6.332837247559131, CurrSamplesPerSec=5.6983633125161095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:39:20,443] [INFO] [timer.py:197:stop] 0/5524, RunningAvgSamplesPerSec=6.332858539457194, CurrSamplesPerSec=5.745685130183153, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:39:31,700] [INFO] [timer.py:197:stop] 0/5526, RunningAvgSamplesPerSec=6.332865922446157, CurrSamplesPerSec=5.710178478218908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:39:42,975] [INFO] [timer.py:197:stop] 0/5528, RunningAvgSamplesPerSec=6.332877851007105, CurrSamplesPerSec=5.728147390091605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:39:54,237] [INFO] [timer.py:197:stop] 0/5530, RunningAvgSamplesPerSec=6.332890099141983, CurrSamplesPerSec=5.72886621035573, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:40:05,451] [INFO] [timer.py:197:stop] 0/5532, RunningAvgSamplesPerSec=6.332902675261394, CurrSamplesPerSec=5.730549542190849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:40:16,803] [INFO] [timer.py:197:stop] 0/5534, RunningAvgSamplesPerSec=6.33290837551137, CurrSamplesPerSec=5.707021102686406, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:40:28,091] [INFO] [timer.py:197:stop] 0/5536, RunningAvgSamplesPerSec=6.332914417331108, CurrSamplesPerSec=5.70401605311191, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:40:39,392] [INFO] [timer.py:197:stop] 0/5538, RunningAvgSamplesPerSec=6.33291884533572, CurrSamplesPerSec=5.687822104017034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:40:50,946] [INFO] [logging.py:68:log_dist] [Rank 0] step=2770, skipped=5, lr=[4.968888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:40:50,948] [INFO] [timer.py:197:stop] 0/5540, RunningAvgSamplesPerSec=6.332925819119135, CurrSamplesPerSec=5.72190881880358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:41:02,276] [INFO] [timer.py:197:stop] 0/5542, RunningAvgSamplesPerSec=6.3329356245064075, CurrSamplesPerSec=5.722274254974737, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:41:13,559] [INFO] [timer.py:197:stop] 0/5544, RunningAvgSamplesPerSec=6.332943514845411, CurrSamplesPerSec=5.684454414219593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:41:24,945] [INFO] [timer.py:197:stop] 0/5546, RunningAvgSamplesPerSec=6.332950889628434, CurrSamplesPerSec=5.713319163448986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:41:36,237] [INFO] [timer.py:197:stop] 0/5548, RunningAvgSamplesPerSec=6.332957682383578, CurrSamplesPerSec=5.703936058737852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:41:47,562] [INFO] [timer.py:197:stop] 0/5550, RunningAvgSamplesPerSec=6.332956047074123, CurrSamplesPerSec=5.685354242281351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0012, 'learning_rate': 4.957777777777778e-06, 'epoch': 11.76} +[2022-12-17 05:41:58,980] [INFO] [timer.py:197:stop] 0/5552, RunningAvgSamplesPerSec=6.332966331820771, CurrSamplesPerSec=5.733141536241398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:42:10,228] [INFO] [timer.py:197:stop] 0/5554, RunningAvgSamplesPerSec=6.332978090415024, CurrSamplesPerSec=5.718462160113856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:42:21,520] [INFO] [timer.py:197:stop] 0/5556, RunningAvgSamplesPerSec=6.332983898220038, CurrSamplesPerSec=5.697712836015588, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:42:32,801] [INFO] [timer.py:197:stop] 0/5558, RunningAvgSamplesPerSec=6.332992313775625, CurrSamplesPerSec=5.715544841767603, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:42:44,184] [INFO] [logging.py:68:log_dist] [Rank 0] step=2780, skipped=5, lr=[4.946666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:42:44,186] [INFO] [timer.py:197:stop] 0/5560, RunningAvgSamplesPerSec=6.332978351255422, CurrSamplesPerSec=5.627949114964652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:42:55,885] [INFO] [timer.py:197:stop] 0/5562, RunningAvgSamplesPerSec=6.332894527720718, CurrSamplesPerSec=5.328057955897367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:43:07,213] [INFO] [timer.py:197:stop] 0/5564, RunningAvgSamplesPerSec=6.332891784684995, CurrSamplesPerSec=5.661491093777932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:43:18,518] [INFO] [timer.py:197:stop] 0/5566, RunningAvgSamplesPerSec=6.332894393890626, CurrSamplesPerSec=5.710761823927876, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:43:29,902] [INFO] [timer.py:197:stop] 0/5568, RunningAvgSamplesPerSec=6.332878696942575, CurrSamplesPerSec=5.613556043012562, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:43:41,167] [INFO] [timer.py:197:stop] 0/5570, RunningAvgSamplesPerSec=6.332889244247719, CurrSamplesPerSec=5.7199923851472185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:43:52,430] [INFO] [timer.py:197:stop] 0/5572, RunningAvgSamplesPerSec=6.332901017021467, CurrSamplesPerSec=5.740034966821891, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:44:03,768] [INFO] [timer.py:197:stop] 0/5574, RunningAvgSamplesPerSec=6.332896168709029, CurrSamplesPerSec=5.663117615475737, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:44:15,088] [INFO] [timer.py:197:stop] 0/5576, RunningAvgSamplesPerSec=6.332895772682674, CurrSamplesPerSec=5.692906722591734, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:44:26,629] [INFO] [timer.py:197:stop] 0/5578, RunningAvgSamplesPerSec=6.332905969270301, CurrSamplesPerSec=5.718397839872948, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:44:38,107] [INFO] [logging.py:68:log_dist] [Rank 0] step=2790, skipped=5, lr=[4.924444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:44:38,108] [INFO] [timer.py:197:stop] 0/5580, RunningAvgSamplesPerSec=6.332897487884689, CurrSamplesPerSec=5.719037939441172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:44:49,419] [INFO] [timer.py:197:stop] 0/5582, RunningAvgSamplesPerSec=6.332897838621812, CurrSamplesPerSec=5.70153751260587, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:45:00,687] [INFO] [timer.py:197:stop] 0/5584, RunningAvgSamplesPerSec=6.3329080849694765, CurrSamplesPerSec=5.710074018225663, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:45:12,195] [INFO] [timer.py:197:stop] 0/5586, RunningAvgSamplesPerSec=6.332903922957749, CurrSamplesPerSec=5.720801817491398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:45:23,531] [INFO] [timer.py:197:stop] 0/5588, RunningAvgSamplesPerSec=6.332901741022134, CurrSamplesPerSec=5.674606021061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:45:34,856] [INFO] [timer.py:197:stop] 0/5590, RunningAvgSamplesPerSec=6.332897804579388, CurrSamplesPerSec=5.681754473526077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:45:46,217] [INFO] [timer.py:197:stop] 0/5592, RunningAvgSamplesPerSec=6.33288074159132, CurrSamplesPerSec=5.6802431940249365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:45:57,527] [INFO] [timer.py:197:stop] 0/5594, RunningAvgSamplesPerSec=6.3328827302159905, CurrSamplesPerSec=5.692431554689089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:46:08,808] [INFO] [timer.py:197:stop] 0/5596, RunningAvgSamplesPerSec=6.332891171460052, CurrSamplesPerSec=5.714777044771578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:46:20,139] [INFO] [timer.py:197:stop] 0/5598, RunningAvgSamplesPerSec=6.332887723565751, CurrSamplesPerSec=5.698770752387616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:46:31,417] [INFO] [logging.py:68:log_dist] [Rank 0] step=2800, skipped=5, lr=[4.902222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:46:31,419] [INFO] [timer.py:197:stop] 0/5600, RunningAvgSamplesPerSec=6.332889171708729, CurrSamplesPerSec=5.688668025550359, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0017, 'learning_rate': 4.902222222222222e-06, 'epoch': 11.86} +[2022-12-17 05:46:42,751] [INFO] [timer.py:197:stop] 0/5602, RunningAvgSamplesPerSec=6.332892828513426, CurrSamplesPerSec=5.702749981251817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:46:54,210] [INFO] [timer.py:197:stop] 0/5604, RunningAvgSamplesPerSec=6.332901936235582, CurrSamplesPerSec=5.735084681740569, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:47:05,700] [INFO] [timer.py:197:stop] 0/5606, RunningAvgSamplesPerSec=6.33290293370556, CurrSamplesPerSec=5.691225158507459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:47:17,388] [INFO] [timer.py:197:stop] 0/5608, RunningAvgSamplesPerSec=6.332820121132463, CurrSamplesPerSec=5.3429333136735115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:47:28,766] [INFO] [timer.py:197:stop] 0/5610, RunningAvgSamplesPerSec=6.332821523370211, CurrSamplesPerSec=5.702558326024797, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:47:40,296] [INFO] [timer.py:197:stop] 0/5612, RunningAvgSamplesPerSec=6.332829560330361, CurrSamplesPerSec=5.71075842214971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:47:51,692] [INFO] [timer.py:197:stop] 0/5614, RunningAvgSamplesPerSec=6.332812496434912, CurrSamplesPerSec=5.608840873003154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:48:03,251] [INFO] [timer.py:197:stop] 0/5616, RunningAvgSamplesPerSec=6.332808657762249, CurrSamplesPerSec=5.6850399804175815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:48:14,569] [INFO] [timer.py:197:stop] 0/5618, RunningAvgSamplesPerSec=6.332807418728847, CurrSamplesPerSec=5.673635721564932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:48:25,871] [INFO] [logging.py:68:log_dist] [Rank 0] step=2810, skipped=5, lr=[4.880000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:48:25,872] [INFO] [timer.py:197:stop] 0/5620, RunningAvgSamplesPerSec=6.332805826090633, CurrSamplesPerSec=5.68405455531197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:48:37,425] [INFO] [timer.py:197:stop] 0/5622, RunningAvgSamplesPerSec=6.332799703924759, CurrSamplesPerSec=5.670917772159438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:48:48,976] [INFO] [timer.py:197:stop] 0/5624, RunningAvgSamplesPerSec=6.332805524442007, CurrSamplesPerSec=5.699043943930853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:49:00,253] [INFO] [timer.py:197:stop] 0/5626, RunningAvgSamplesPerSec=6.332806477016322, CurrSamplesPerSec=5.683839843942882, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:49:11,510] [INFO] [timer.py:197:stop] 0/5628, RunningAvgSamplesPerSec=6.3328085207659495, CurrSamplesPerSec=5.6995364324043365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:49:22,835] [INFO] [timer.py:197:stop] 0/5630, RunningAvgSamplesPerSec=6.332809979530304, CurrSamplesPerSec=5.70279868456956, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:49:34,163] [INFO] [timer.py:197:stop] 0/5632, RunningAvgSamplesPerSec=6.33280764518486, CurrSamplesPerSec=5.694752613713792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:49:45,517] [INFO] [timer.py:197:stop] 0/5634, RunningAvgSamplesPerSec=6.332799728337538, CurrSamplesPerSec=5.660992264487115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:49:56,825] [INFO] [timer.py:197:stop] 0/5636, RunningAvgSamplesPerSec=6.332801426210607, CurrSamplesPerSec=5.695056109472525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:50:08,283] [INFO] [timer.py:197:stop] 0/5638, RunningAvgSamplesPerSec=6.332765862833, CurrSamplesPerSec=5.530515181442866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:50:19,615] [INFO] [logging.py:68:log_dist] [Rank 0] step=2820, skipped=5, lr=[4.857777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:50:19,616] [INFO] [timer.py:197:stop] 0/5640, RunningAvgSamplesPerSec=6.33276122207884, CurrSamplesPerSec=5.680091028575591, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:50:30,917] [INFO] [timer.py:197:stop] 0/5642, RunningAvgSamplesPerSec=6.332763728625373, CurrSamplesPerSec=5.711915503414802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:50:42,205] [INFO] [timer.py:197:stop] 0/5644, RunningAvgSamplesPerSec=6.332769598022131, CurrSamplesPerSec=5.701877339420423, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:50:53,471] [INFO] [timer.py:197:stop] 0/5646, RunningAvgSamplesPerSec=6.332780589594118, CurrSamplesPerSec=5.726134919783692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:51:04,789] [INFO] [timer.py:197:stop] 0/5648, RunningAvgSamplesPerSec=6.332779730197156, CurrSamplesPerSec=5.677553735791651, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:51:16,212] [INFO] [timer.py:197:stop] 0/5650, RunningAvgSamplesPerSec=6.332758376727921, CurrSamplesPerSec=5.638280024107815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0011, 'learning_rate': 4.846666666666667e-06, 'epoch': 11.97} +[2022-12-17 05:51:27,493] [INFO] [timer.py:197:stop] 0/5652, RunningAvgSamplesPerSec=6.332766430333524, CurrSamplesPerSec=5.727767270671415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:51:38,883] [INFO] [timer.py:197:stop] 0/5654, RunningAvgSamplesPerSec=6.332771971674966, CurrSamplesPerSec=5.71815494655829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:51:50,324] [INFO] [timer.py:197:stop] 0/5656, RunningAvgSamplesPerSec=6.332751991785921, CurrSamplesPerSec=5.691306486070565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:52:01,597] [INFO] [timer.py:197:stop] 0/5658, RunningAvgSamplesPerSec=6.332761303218689, CurrSamplesPerSec=5.726962708401693, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:52:12,901] [INFO] [logging.py:68:log_dist] [Rank 0] step=2830, skipped=5, lr=[4.835555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:52:12,902] [INFO] [timer.py:197:stop] 0/5660, RunningAvgSamplesPerSec=6.332763278259856, CurrSamplesPerSec=5.6826154326731615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:52:24,257] [INFO] [timer.py:197:stop] 0/5662, RunningAvgSamplesPerSec=6.332754392749188, CurrSamplesPerSec=5.706142058898655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:52:32,752] [INFO] [timer.py:197:stop] 0/5664, RunningAvgSamplesPerSec=6.333304594674231, CurrSamplesPerSec=10.171249085693656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:52:44,051] [INFO] [timer.py:197:stop] 0/5666, RunningAvgSamplesPerSec=6.333307790781571, CurrSamplesPerSec=5.710589310286816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:52:55,435] [INFO] [timer.py:197:stop] 0/5668, RunningAvgSamplesPerSec=6.333290583434095, CurrSamplesPerSec=5.666261032287638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:53:06,766] [INFO] [timer.py:197:stop] 0/5670, RunningAvgSamplesPerSec=6.33328694885843, CurrSamplesPerSec=5.692666714254806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:53:18,101] [INFO] [timer.py:197:stop] 0/5672, RunningAvgSamplesPerSec=6.333282186189515, CurrSamplesPerSec=5.670440039003418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:53:29,691] [INFO] [timer.py:197:stop] 0/5674, RunningAvgSamplesPerSec=6.333221424987277, CurrSamplesPerSec=5.713069892305831, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:53:41,004] [INFO] [timer.py:197:stop] 0/5676, RunningAvgSamplesPerSec=6.333218738932709, CurrSamplesPerSec=5.699126221095192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:53:52,433] [INFO] [timer.py:197:stop] 0/5678, RunningAvgSamplesPerSec=6.3332006328558315, CurrSamplesPerSec=5.607977987206557, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:54:03,976] [INFO] [logging.py:68:log_dist] [Rank 0] step=2840, skipped=5, lr=[4.8133333333333336e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:54:03,978] [INFO] [timer.py:197:stop] 0/5680, RunningAvgSamplesPerSec=6.333199732486365, CurrSamplesPerSec=5.711543125327429, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:54:15,293] [INFO] [timer.py:197:stop] 0/5682, RunningAvgSamplesPerSec=6.333195939860297, CurrSamplesPerSec=5.6958679262802505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:54:26,666] [INFO] [timer.py:197:stop] 0/5684, RunningAvgSamplesPerSec=6.33318316638135, CurrSamplesPerSec=5.636633885696688, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:54:37,975] [INFO] [timer.py:197:stop] 0/5686, RunningAvgSamplesPerSec=6.3331854938121515, CurrSamplesPerSec=5.702362565085231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:54:49,305] [INFO] [timer.py:197:stop] 0/5688, RunningAvgSamplesPerSec=6.333182590483902, CurrSamplesPerSec=5.704248049485593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:55:00,779] [INFO] [timer.py:197:stop] 0/5690, RunningAvgSamplesPerSec=6.33314737555965, CurrSamplesPerSec=5.53050811692653, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:55:12,113] [INFO] [timer.py:197:stop] 0/5692, RunningAvgSamplesPerSec=6.333142736354172, CurrSamplesPerSec=5.687230903873154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:55:23,408] [INFO] [timer.py:197:stop] 0/5694, RunningAvgSamplesPerSec=6.333146815189666, CurrSamplesPerSec=5.720327101529128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:55:35,018] [INFO] [timer.py:197:stop] 0/5696, RunningAvgSamplesPerSec=6.333081745735081, CurrSamplesPerSec=5.42185939073298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:55:46,331] [INFO] [timer.py:197:stop] 0/5698, RunningAvgSamplesPerSec=6.3330818798007895, CurrSamplesPerSec=5.698708810029422, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:55:57,840] [INFO] [logging.py:68:log_dist] [Rank 0] step=2850, skipped=5, lr=[4.791111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:55:57,841] [INFO] [timer.py:197:stop] 0/5700, RunningAvgSamplesPerSec=6.333078349179951, CurrSamplesPerSec=5.692917347170979, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0008, 'learning_rate': 4.791111111111111e-06, 'epoch': 12.08} +[2022-12-17 05:56:09,228] [INFO] [timer.py:197:stop] 0/5702, RunningAvgSamplesPerSec=6.333059736306311, CurrSamplesPerSec=5.597479915333243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:56:20,516] [INFO] [timer.py:197:stop] 0/5704, RunningAvgSamplesPerSec=6.333065159279361, CurrSamplesPerSec=5.715673598766464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:56:31,823] [INFO] [timer.py:197:stop] 0/5706, RunningAvgSamplesPerSec=6.333066730706159, CurrSamplesPerSec=5.701342790478605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:56:43,423] [INFO] [timer.py:197:stop] 0/5708, RunningAvgSamplesPerSec=6.333056047616498, CurrSamplesPerSec=5.715045689417052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:56:54,704] [INFO] [timer.py:197:stop] 0/5710, RunningAvgSamplesPerSec=6.33306372097821, CurrSamplesPerSec=5.709811184638263, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:57:06,048] [INFO] [timer.py:197:stop] 0/5712, RunningAvgSamplesPerSec=6.333071282850042, CurrSamplesPerSec=5.705976131253787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:57:17,368] [INFO] [timer.py:197:stop] 0/5714, RunningAvgSamplesPerSec=6.33306681450364, CurrSamplesPerSec=5.652331112271843, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:57:28,654] [INFO] [timer.py:197:stop] 0/5716, RunningAvgSamplesPerSec=6.3330734652637615, CurrSamplesPerSec=5.707509874027401, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:57:40,257] [INFO] [timer.py:197:stop] 0/5718, RunningAvgSamplesPerSec=6.3330681573262, CurrSamplesPerSec=5.672987041388273, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:57:51,815] [INFO] [logging.py:68:log_dist] [Rank 0] step=2860, skipped=5, lr=[4.768888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:57:51,817] [INFO] [timer.py:197:stop] 0/5720, RunningAvgSamplesPerSec=6.333063883374338, CurrSamplesPerSec=5.687774138233812, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:58:03,114] [INFO] [timer.py:197:stop] 0/5722, RunningAvgSamplesPerSec=6.333068138037818, CurrSamplesPerSec=5.7160036715575195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:58:14,417] [INFO] [timer.py:197:stop] 0/5724, RunningAvgSamplesPerSec=6.3330711810578535, CurrSamplesPerSec=5.703349017171229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:58:25,784] [INFO] [timer.py:197:stop] 0/5726, RunningAvgSamplesPerSec=6.3330600285572825, CurrSamplesPerSec=5.726510424690953, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:58:37,095] [INFO] [timer.py:197:stop] 0/5728, RunningAvgSamplesPerSec=6.3330607107411065, CurrSamplesPerSec=5.6806035677858535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:58:48,461] [INFO] [timer.py:197:stop] 0/5730, RunningAvgSamplesPerSec=6.333046162093266, CurrSamplesPerSec=5.624621524913426, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:58:59,989] [INFO] [timer.py:197:stop] 0/5732, RunningAvgSamplesPerSec=6.333047457967725, CurrSamplesPerSec=5.699362660168836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:59:11,345] [INFO] [timer.py:197:stop] 0/5734, RunningAvgSamplesPerSec=6.333049646595997, CurrSamplesPerSec=5.7098857569589505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:59:22,670] [INFO] [timer.py:197:stop] 0/5736, RunningAvgSamplesPerSec=6.3330471630973975, CurrSamplesPerSec=5.656524632617846, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:59:34,092] [INFO] [timer.py:197:stop] 0/5738, RunningAvgSamplesPerSec=6.333058066751148, CurrSamplesPerSec=5.725679347229084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:59:45,411] [INFO] [logging.py:68:log_dist] [Rank 0] step=2870, skipped=5, lr=[4.746666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 05:59:45,413] [INFO] [timer.py:197:stop] 0/5740, RunningAvgSamplesPerSec=6.333056508201623, CurrSamplesPerSec=5.693592330700572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 05:59:56,699] [INFO] [timer.py:197:stop] 0/5742, RunningAvgSamplesPerSec=6.333056869621617, CurrSamplesPerSec=5.682889724245197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:00:07,969] [INFO] [timer.py:197:stop] 0/5744, RunningAvgSamplesPerSec=6.3330669801920125, CurrSamplesPerSec=5.732264465585629, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:00:19,237] [INFO] [timer.py:197:stop] 0/5746, RunningAvgSamplesPerSec=6.333077193576914, CurrSamplesPerSec=5.734636504753801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:00:30,764] [INFO] [timer.py:197:stop] 0/5748, RunningAvgSamplesPerSec=6.333027780024121, CurrSamplesPerSec=5.457497362896864, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:00:42,080] [INFO] [timer.py:197:stop] 0/5750, RunningAvgSamplesPerSec=6.333026353581669, CurrSamplesPerSec=5.670943410065879, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0007, 'learning_rate': 4.735555555555556e-06, 'epoch': 12.18} +[2022-12-17 06:00:53,386] [INFO] [timer.py:197:stop] 0/5752, RunningAvgSamplesPerSec=6.333034645889236, CurrSamplesPerSec=5.7249857459432505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:01:04,646] [INFO] [timer.py:197:stop] 0/5754, RunningAvgSamplesPerSec=6.333043290240301, CurrSamplesPerSec=5.723611739462003, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:01:15,937] [INFO] [timer.py:197:stop] 0/5756, RunningAvgSamplesPerSec=6.333048192210894, CurrSamplesPerSec=5.7072176689809595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:01:27,234] [INFO] [timer.py:197:stop] 0/5758, RunningAvgSamplesPerSec=6.333051890604268, CurrSamplesPerSec=5.698058980366412, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:01:38,543] [INFO] [logging.py:68:log_dist] [Rank 0] step=2880, skipped=5, lr=[4.724444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:01:38,545] [INFO] [timer.py:197:stop] 0/5760, RunningAvgSamplesPerSec=6.333052965722199, CurrSamplesPerSec=5.686170522167649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:01:49,806] [INFO] [timer.py:197:stop] 0/5762, RunningAvgSamplesPerSec=6.333061178432799, CurrSamplesPerSec=5.7263543044379785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:02:01,134] [INFO] [timer.py:197:stop] 0/5764, RunningAvgSamplesPerSec=6.333058222287947, CurrSamplesPerSec=5.685659145615068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:02:12,459] [INFO] [timer.py:197:stop] 0/5766, RunningAvgSamplesPerSec=6.333056332696115, CurrSamplesPerSec=5.68130040403441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:02:23,762] [INFO] [timer.py:197:stop] 0/5768, RunningAvgSamplesPerSec=6.33305904526347, CurrSamplesPerSec=5.6914582876511375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:02:35,061] [INFO] [timer.py:197:stop] 0/5770, RunningAvgSamplesPerSec=6.333062133887484, CurrSamplesPerSec=5.714090461007016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:02:46,746] [INFO] [timer.py:197:stop] 0/5772, RunningAvgSamplesPerSec=6.332981545034309, CurrSamplesPerSec=5.330093869140605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:02:58,054] [INFO] [timer.py:197:stop] 0/5774, RunningAvgSamplesPerSec=6.332982922697146, CurrSamplesPerSec=5.684175638494878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:03:09,529] [INFO] [timer.py:197:stop] 0/5776, RunningAvgSamplesPerSec=6.332982962694763, CurrSamplesPerSec=5.70664256871121, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:03:20,858] [INFO] [timer.py:197:stop] 0/5778, RunningAvgSamplesPerSec=6.332978596550738, CurrSamplesPerSec=5.674524450355167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:03:32,169] [INFO] [logging.py:68:log_dist] [Rank 0] step=2890, skipped=5, lr=[4.7022222222222225e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:03:32,170] [INFO] [timer.py:197:stop] 0/5780, RunningAvgSamplesPerSec=6.332980005918211, CurrSamplesPerSec=5.709917092489247, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:03:43,748] [INFO] [timer.py:197:stop] 0/5782, RunningAvgSamplesPerSec=6.33297547290769, CurrSamplesPerSec=5.677242016650395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:03:55,179] [INFO] [timer.py:197:stop] 0/5784, RunningAvgSamplesPerSec=6.332969718676225, CurrSamplesPerSec=5.670225157206506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:04:06,469] [INFO] [timer.py:197:stop] 0/5786, RunningAvgSamplesPerSec=6.33297987413485, CurrSamplesPerSec=5.703800073451162, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:04:17,960] [INFO] [timer.py:197:stop] 0/5788, RunningAvgSamplesPerSec=6.332987330191376, CurrSamplesPerSec=5.696377756282663, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:04:29,355] [INFO] [timer.py:197:stop] 0/5790, RunningAvgSamplesPerSec=6.332994329889643, CurrSamplesPerSec=5.71702041843999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:04:40,697] [INFO] [timer.py:197:stop] 0/5792, RunningAvgSamplesPerSec=6.3329883259810735, CurrSamplesPerSec=5.636968860406683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:04:51,944] [INFO] [timer.py:197:stop] 0/5794, RunningAvgSamplesPerSec=6.333002093160635, CurrSamplesPerSec=5.73800064443515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:05:03,355] [INFO] [timer.py:197:stop] 0/5796, RunningAvgSamplesPerSec=6.332997662056815, CurrSamplesPerSec=5.709971991224141, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:05:14,590] [INFO] [timer.py:197:stop] 0/5798, RunningAvgSamplesPerSec=6.3330116405150365, CurrSamplesPerSec=5.737638347737378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:05:26,187] [INFO] [logging.py:68:log_dist] [Rank 0] step=2900, skipped=5, lr=[4.680000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:05:26,189] [INFO] [timer.py:197:stop] 0/5800, RunningAvgSamplesPerSec=6.333014348129561, CurrSamplesPerSec=5.693146992655114, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0008, 'learning_rate': 4.680000000000001e-06, 'epoch': 12.29} +[2022-12-17 06:05:37,429] [INFO] [timer.py:197:stop] 0/5802, RunningAvgSamplesPerSec=6.333029910069844, CurrSamplesPerSec=5.731119927273069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:05:48,682] [INFO] [timer.py:197:stop] 0/5804, RunningAvgSamplesPerSec=6.333035842662751, CurrSamplesPerSec=5.704081019890971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:06:00,097] [INFO] [timer.py:197:stop] 0/5806, RunningAvgSamplesPerSec=6.333042504696337, CurrSamplesPerSec=5.712226909053271, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:06:11,623] [INFO] [timer.py:197:stop] 0/5808, RunningAvgSamplesPerSec=6.333043987856465, CurrSamplesPerSec=5.68961477239248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:06:22,901] [INFO] [timer.py:197:stop] 0/5810, RunningAvgSamplesPerSec=6.33304796554682, CurrSamplesPerSec=5.692458594636181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:06:34,200] [INFO] [timer.py:197:stop] 0/5812, RunningAvgSamplesPerSec=6.333053093884182, CurrSamplesPerSec=5.704069383941154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:06:45,832] [INFO] [timer.py:197:stop] 0/5814, RunningAvgSamplesPerSec=6.333036366855897, CurrSamplesPerSec=5.698952715852549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:06:57,143] [INFO] [timer.py:197:stop] 0/5816, RunningAvgSamplesPerSec=6.333036955464624, CurrSamplesPerSec=5.704274232079576, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:07:08,666] [INFO] [timer.py:197:stop] 0/5818, RunningAvgSamplesPerSec=6.33303540294264, CurrSamplesPerSec=5.693990392427025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:07:20,167] [INFO] [logging.py:68:log_dist] [Rank 0] step=2910, skipped=5, lr=[4.6577777777777785e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:07:20,169] [INFO] [timer.py:197:stop] 0/5820, RunningAvgSamplesPerSec=6.333035206874964, CurrSamplesPerSec=5.698076155639671, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:07:31,475] [INFO] [timer.py:197:stop] 0/5822, RunningAvgSamplesPerSec=6.333036966743757, CurrSamplesPerSec=5.705704457700638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:07:42,769] [INFO] [timer.py:197:stop] 0/5824, RunningAvgSamplesPerSec=6.333041189439793, CurrSamplesPerSec=5.708153121779355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:07:54,295] [INFO] [timer.py:197:stop] 0/5826, RunningAvgSamplesPerSec=6.333039135741505, CurrSamplesPerSec=5.678836995791065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:08:05,618] [INFO] [timer.py:197:stop] 0/5828, RunningAvgSamplesPerSec=6.333037473781379, CurrSamplesPerSec=5.682891167955098, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:08:16,973] [INFO] [timer.py:197:stop] 0/5830, RunningAvgSamplesPerSec=6.333024679570001, CurrSamplesPerSec=5.645582961104968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:08:28,288] [INFO] [timer.py:197:stop] 0/5832, RunningAvgSamplesPerSec=6.333023503984594, CurrSamplesPerSec=5.709940412176869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:08:39,578] [INFO] [timer.py:197:stop] 0/5834, RunningAvgSamplesPerSec=6.333028469659971, CurrSamplesPerSec=5.7119590155108995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:08:50,859] [INFO] [timer.py:197:stop] 0/5836, RunningAvgSamplesPerSec=6.333035625576971, CurrSamplesPerSec=5.6891756027981115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:09:02,335] [INFO] [timer.py:197:stop] 0/5838, RunningAvgSamplesPerSec=6.333039610848032, CurrSamplesPerSec=5.70028851881641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:09:13,596] [INFO] [logging.py:68:log_dist] [Rank 0] step=2920, skipped=5, lr=[4.635555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:09:13,598] [INFO] [timer.py:197:stop] 0/5840, RunningAvgSamplesPerSec=6.333048189129644, CurrSamplesPerSec=5.726195505402264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:09:24,852] [INFO] [timer.py:197:stop] 0/5842, RunningAvgSamplesPerSec=6.333057708895621, CurrSamplesPerSec=5.724060147746541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:09:36,268] [INFO] [timer.py:197:stop] 0/5844, RunningAvgSamplesPerSec=6.333058364260927, CurrSamplesPerSec=5.700625049162459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:09:47,565] [INFO] [timer.py:197:stop] 0/5846, RunningAvgSamplesPerSec=6.333062597156241, CurrSamplesPerSec=5.706827461725405, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:09:58,850] [INFO] [timer.py:197:stop] 0/5848, RunningAvgSamplesPerSec=6.3330712101826085, CurrSamplesPerSec=5.722144956467561, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:10:10,241] [INFO] [timer.py:197:stop] 0/5850, RunningAvgSamplesPerSec=6.333076512104131, CurrSamplesPerSec=5.716131962346995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.001, 'learning_rate': 4.624444444444445e-06, 'epoch': 12.39} +[2022-12-17 06:10:21,549] [INFO] [timer.py:197:stop] 0/5852, RunningAvgSamplesPerSec=6.333077908595145, CurrSamplesPerSec=5.686055135248809, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:10:32,841] [INFO] [timer.py:197:stop] 0/5854, RunningAvgSamplesPerSec=6.333082986621699, CurrSamplesPerSec=5.695888472429112, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:10:44,226] [INFO] [timer.py:197:stop] 0/5856, RunningAvgSamplesPerSec=6.333068188018257, CurrSamplesPerSec=5.685718877744978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:10:55,591] [INFO] [timer.py:197:stop] 0/5858, RunningAvgSamplesPerSec=6.333058188138381, CurrSamplesPerSec=5.6309342973964815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:11:06,930] [INFO] [logging.py:68:log_dist] [Rank 0] step=2930, skipped=5, lr=[4.613333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:11:06,931] [INFO] [timer.py:197:stop] 0/5860, RunningAvgSamplesPerSec=6.333059374007877, CurrSamplesPerSec=5.7035758694456495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:11:18,215] [INFO] [timer.py:197:stop] 0/5862, RunningAvgSamplesPerSec=6.333066234612839, CurrSamplesPerSec=5.7189429022784335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:11:29,480] [INFO] [timer.py:197:stop] 0/5864, RunningAvgSamplesPerSec=6.333073388154316, CurrSamplesPerSec=5.700475421412082, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:11:40,883] [INFO] [timer.py:197:stop] 0/5866, RunningAvgSamplesPerSec=6.333075174763755, CurrSamplesPerSec=5.705023932007137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:11:52,463] [INFO] [timer.py:197:stop] 0/5868, RunningAvgSamplesPerSec=6.33308124388736, CurrSamplesPerSec=5.7109671530431925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:12:03,942] [INFO] [timer.py:197:stop] 0/5870, RunningAvgSamplesPerSec=6.3330868665001185, CurrSamplesPerSec=5.7183637312413875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:12:15,485] [INFO] [timer.py:197:stop] 0/5872, RunningAvgSamplesPerSec=6.333047419957166, CurrSamplesPerSec=5.499836215038959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:12:26,779] [INFO] [timer.py:197:stop] 0/5874, RunningAvgSamplesPerSec=6.333052248890246, CurrSamplesPerSec=5.716640313397472, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:12:38,045] [INFO] [timer.py:197:stop] 0/5876, RunningAvgSamplesPerSec=6.333063104667585, CurrSamplesPerSec=5.7382314883322785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:12:49,591] [INFO] [timer.py:197:stop] 0/5878, RunningAvgSamplesPerSec=6.333007492183222, CurrSamplesPerSec=5.444904238732139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:13:00,893] [INFO] [logging.py:68:log_dist] [Rank 0] step=2940, skipped=5, lr=[4.591111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:13:00,895] [INFO] [timer.py:197:stop] 0/5880, RunningAvgSamplesPerSec=6.333009913626235, CurrSamplesPerSec=5.695302844432262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:13:12,177] [INFO] [timer.py:197:stop] 0/5882, RunningAvgSamplesPerSec=6.333018136040713, CurrSamplesPerSec=5.718970438362116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:13:23,901] [INFO] [timer.py:197:stop] 0/5884, RunningAvgSamplesPerSec=6.333017939189659, CurrSamplesPerSec=5.704586259823864, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:13:35,218] [INFO] [timer.py:197:stop] 0/5886, RunningAvgSamplesPerSec=6.333018089438299, CurrSamplesPerSec=5.71238542051308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:13:46,725] [INFO] [timer.py:197:stop] 0/5888, RunningAvgSamplesPerSec=6.333024751875963, CurrSamplesPerSec=5.699916444826087, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:13:58,297] [INFO] [timer.py:197:stop] 0/5890, RunningAvgSamplesPerSec=6.333027057896281, CurrSamplesPerSec=5.704002478150968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:14:09,601] [INFO] [timer.py:197:stop] 0/5892, RunningAvgSamplesPerSec=6.333029796735478, CurrSamplesPerSec=5.7104729302110275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:14:20,923] [INFO] [timer.py:197:stop] 0/5894, RunningAvgSamplesPerSec=6.333037543211003, CurrSamplesPerSec=5.718080401695344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:14:32,249] [INFO] [timer.py:197:stop] 0/5896, RunningAvgSamplesPerSec=6.333035956380963, CurrSamplesPerSec=5.708275476630145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:14:43,527] [INFO] [timer.py:197:stop] 0/5898, RunningAvgSamplesPerSec=6.3330439846194055, CurrSamplesPerSec=5.728124165914754, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:14:54,832] [INFO] [logging.py:68:log_dist] [Rank 0] step=2950, skipped=5, lr=[4.568888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:14:54,834] [INFO] [timer.py:197:stop] 0/5900, RunningAvgSamplesPerSec=6.333046513333144, CurrSamplesPerSec=5.708106754545724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0012, 'learning_rate': 4.568888888888889e-06, 'epoch': 12.5} +[2022-12-17 06:15:06,151] [INFO] [timer.py:197:stop] 0/5902, RunningAvgSamplesPerSec=6.333046281630883, CurrSamplesPerSec=5.713112206069512, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:15:17,464] [INFO] [timer.py:197:stop] 0/5904, RunningAvgSamplesPerSec=6.333052801023202, CurrSamplesPerSec=5.71561007143143, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:15:28,833] [INFO] [timer.py:197:stop] 0/5906, RunningAvgSamplesPerSec=6.333035290080761, CurrSamplesPerSec=5.608293863527618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:15:40,572] [INFO] [timer.py:197:stop] 0/5908, RunningAvgSamplesPerSec=6.333040218050896, CurrSamplesPerSec=5.708095102165659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:15:51,848] [INFO] [timer.py:197:stop] 0/5910, RunningAvgSamplesPerSec=6.333052727101973, CurrSamplesPerSec=5.736807960714468, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:16:03,374] [INFO] [timer.py:197:stop] 0/5912, RunningAvgSamplesPerSec=6.333008477793644, CurrSamplesPerSec=5.4947222221881065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:16:14,794] [INFO] [timer.py:197:stop] 0/5914, RunningAvgSamplesPerSec=6.333011937522531, CurrSamplesPerSec=5.707201651961366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:16:26,254] [INFO] [timer.py:197:stop] 0/5916, RunningAvgSamplesPerSec=6.333016941064426, CurrSamplesPerSec=5.697815392920308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:16:37,755] [INFO] [timer.py:197:stop] 0/5918, RunningAvgSamplesPerSec=6.332988295785954, CurrSamplesPerSec=5.547189727597872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:16:49,165] [INFO] [logging.py:68:log_dist] [Rank 0] step=2960, skipped=5, lr=[4.546666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:16:49,166] [INFO] [timer.py:197:stop] 0/5920, RunningAvgSamplesPerSec=6.332992632489854, CurrSamplesPerSec=5.715456978790322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:17:00,543] [INFO] [timer.py:197:stop] 0/5922, RunningAvgSamplesPerSec=6.332990709493712, CurrSamplesPerSec=5.685131726842036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:17:11,794] [INFO] [timer.py:197:stop] 0/5924, RunningAvgSamplesPerSec=6.332998285720652, CurrSamplesPerSec=5.722485780413905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:17:23,464] [INFO] [timer.py:197:stop] 0/5926, RunningAvgSamplesPerSec=6.332999240441912, CurrSamplesPerSec=5.710772029286689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:17:34,840] [INFO] [timer.py:197:stop] 0/5928, RunningAvgSamplesPerSec=6.33299967952669, CurrSamplesPerSec=5.670417519968683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:17:46,128] [INFO] [timer.py:197:stop] 0/5930, RunningAvgSamplesPerSec=6.333002653796711, CurrSamplesPerSec=5.716089116964696, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:17:57,453] [INFO] [timer.py:197:stop] 0/5932, RunningAvgSamplesPerSec=6.33300097953948, CurrSamplesPerSec=5.719599697949569, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:18:08,773] [INFO] [timer.py:197:stop] 0/5934, RunningAvgSamplesPerSec=6.333000297221696, CurrSamplesPerSec=5.683653789922553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:18:20,073] [INFO] [timer.py:197:stop] 0/5936, RunningAvgSamplesPerSec=6.3330006554919525, CurrSamplesPerSec=5.685189521385519, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:18:31,402] [INFO] [timer.py:197:stop] 0/5938, RunningAvgSamplesPerSec=6.332996660401324, CurrSamplesPerSec=5.696287580558719, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:18:42,786] [INFO] [logging.py:68:log_dist] [Rank 0] step=2970, skipped=5, lr=[4.524444444444444e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:18:42,787] [INFO] [timer.py:197:stop] 0/5940, RunningAvgSamplesPerSec=6.332994887762245, CurrSamplesPerSec=5.695508030136036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:18:54,075] [INFO] [timer.py:197:stop] 0/5942, RunningAvgSamplesPerSec=6.333001122764453, CurrSamplesPerSec=5.720114516738884, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:19:05,677] [INFO] [timer.py:197:stop] 0/5944, RunningAvgSamplesPerSec=6.333004765281942, CurrSamplesPerSec=5.715796276237501, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:19:17,231] [INFO] [timer.py:197:stop] 0/5946, RunningAvgSamplesPerSec=6.333007590785375, CurrSamplesPerSec=5.718903913586931, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:19:28,821] [INFO] [timer.py:197:stop] 0/5948, RunningAvgSamplesPerSec=6.332955003707633, CurrSamplesPerSec=5.437463012309892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:19:40,183] [INFO] [timer.py:197:stop] 0/5950, RunningAvgSamplesPerSec=6.332957337549395, CurrSamplesPerSec=5.6944965039264135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0012, 'learning_rate': 4.513333333333333e-06, 'epoch': 12.61} +[2022-12-17 06:19:51,474] [INFO] [timer.py:197:stop] 0/5952, RunningAvgSamplesPerSec=6.332965662367068, CurrSamplesPerSec=5.721009088510862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:20:02,822] [INFO] [timer.py:197:stop] 0/5954, RunningAvgSamplesPerSec=6.332961677367649, CurrSamplesPerSec=5.668551218034829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:20:14,160] [INFO] [timer.py:197:stop] 0/5956, RunningAvgSamplesPerSec=6.332959969392417, CurrSamplesPerSec=5.677735067371684, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:20:25,505] [INFO] [timer.py:197:stop] 0/5958, RunningAvgSamplesPerSec=6.332957888895088, CurrSamplesPerSec=5.671679580428901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:20:36,919] [INFO] [logging.py:68:log_dist] [Rank 0] step=2980, skipped=5, lr=[4.502222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:20:36,920] [INFO] [timer.py:197:stop] 0/5960, RunningAvgSamplesPerSec=6.3329590493220325, CurrSamplesPerSec=5.697653335352861, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:20:48,227] [INFO] [timer.py:197:stop] 0/5962, RunningAvgSamplesPerSec=6.332961638674421, CurrSamplesPerSec=5.70620222218292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:20:59,636] [INFO] [timer.py:197:stop] 0/5964, RunningAvgSamplesPerSec=6.332966611609955, CurrSamplesPerSec=5.718364949399794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:21:11,169] [INFO] [timer.py:197:stop] 0/5966, RunningAvgSamplesPerSec=6.332969463847246, CurrSamplesPerSec=5.718971413095601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:21:22,408] [INFO] [timer.py:197:stop] 0/5968, RunningAvgSamplesPerSec=6.332982594177314, CurrSamplesPerSec=5.7341594900877935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:21:33,667] [INFO] [timer.py:197:stop] 0/5970, RunningAvgSamplesPerSec=6.3329947216668545, CurrSamplesPerSec=5.718728958379074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:21:45,273] [INFO] [timer.py:197:stop] 0/5972, RunningAvgSamplesPerSec=6.332985761956139, CurrSamplesPerSec=5.712217914034815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:21:56,574] [INFO] [timer.py:197:stop] 0/5974, RunningAvgSamplesPerSec=6.332989670456562, CurrSamplesPerSec=5.711422331639339, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:22:08,107] [INFO] [timer.py:197:stop] 0/5976, RunningAvgSamplesPerSec=6.332995013267368, CurrSamplesPerSec=5.723771859777583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:22:19,699] [INFO] [timer.py:197:stop] 0/5978, RunningAvgSamplesPerSec=6.332998115014715, CurrSamplesPerSec=5.734231759936096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:22:31,011] [INFO] [logging.py:68:log_dist] [Rank 0] step=2990, skipped=5, lr=[4.48e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:22:31,012] [INFO] [timer.py:197:stop] 0/5980, RunningAvgSamplesPerSec=6.332999323998145, CurrSamplesPerSec=5.665226150268889, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:22:42,389] [INFO] [timer.py:197:stop] 0/5982, RunningAvgSamplesPerSec=6.333005629850955, CurrSamplesPerSec=5.700576867200941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:22:53,952] [INFO] [timer.py:197:stop] 0/5984, RunningAvgSamplesPerSec=6.33301324936841, CurrSamplesPerSec=5.732907918338295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:23:05,236] [INFO] [timer.py:197:stop] 0/5986, RunningAvgSamplesPerSec=6.333020150963738, CurrSamplesPerSec=5.709220747511372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:23:16,563] [INFO] [timer.py:197:stop] 0/5988, RunningAvgSamplesPerSec=6.333027199070639, CurrSamplesPerSec=5.72047850468883, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:23:27,940] [INFO] [timer.py:197:stop] 0/5990, RunningAvgSamplesPerSec=6.333021411493446, CurrSamplesPerSec=5.706681147760475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:23:39,270] [INFO] [timer.py:197:stop] 0/5992, RunningAvgSamplesPerSec=6.333012549087907, CurrSamplesPerSec=5.6532462764159055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:23:50,584] [INFO] [timer.py:197:stop] 0/5994, RunningAvgSamplesPerSec=6.333012546323928, CurrSamplesPerSec=5.690703945040657, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:24:02,141] [INFO] [timer.py:197:stop] 0/5996, RunningAvgSamplesPerSec=6.3329965499097245, CurrSamplesPerSec=5.633968527653969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:24:13,457] [INFO] [timer.py:197:stop] 0/5998, RunningAvgSamplesPerSec=6.332990861516932, CurrSamplesPerSec=5.682410453102639, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-17 06:24:24,871] [INFO] [logging.py:68:log_dist] [Rank 0] step=3000, skipped=5, lr=[4.457777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-17 06:24:24,873] [INFO] [timer.py:197:stop] 0/6000, RunningAvgSamplesPerSec=6.332989796557147, CurrSamplesPerSec=5.6977181572736075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0011, 'learning_rate': 4.457777777777778e-06, 'epoch': 12.71} +{'eval_loss': 0.191650390625, 'eval_wer': 9.403141746929155, 'eval_runtime': 2117.549, 'eval_samples_per_second': 3.643, 'eval_steps_per_second': 0.456, 'epoch': 12.71} +[2022-12-17 06:59:46,120] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step3000 is begin to save! +[2022-12-17 06:59:46,130] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt +[2022-12-17 06:59:46,130] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt... +[2022-12-17 06:59:49,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt. +[2022-12-17 06:59:49,706] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-17 07:00:04,837] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-17 07:00:04,837] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-17 07:00:04,837] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now!