{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.99988422039805, "eval_steps": 500, "global_step": 431850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 0.0003, "loss": 3.5282, "step": 2000 }, { "epoch": 0.09, "learning_rate": 0.00029860416424334066, "loss": 1.3191, "step": 4000 }, { "epoch": 0.14, "learning_rate": 0.0002972083284866814, "loss": 1.2226, "step": 6000 }, { "epoch": 0.19, "learning_rate": 0.00029581249273002205, "loss": 1.1832, "step": 8000 }, { "epoch": 0.23, "learning_rate": 0.0002944166569733628, "loss": 1.1595, "step": 10000 }, { "epoch": 0.28, "learning_rate": 0.0002930208212167035, "loss": 1.1368, "step": 12000 }, { "epoch": 0.32, "learning_rate": 0.00029162498546004417, "loss": 1.1176, "step": 14000 }, { "epoch": 0.37, "learning_rate": 0.00029022914970338486, "loss": 1.1044, "step": 16000 }, { "epoch": 0.42, "learning_rate": 0.00028883331394672555, "loss": 1.0954, "step": 18000 }, { "epoch": 0.46, "learning_rate": 0.0002874374781900663, "loss": 1.088, "step": 20000 }, { "epoch": 0.51, "learning_rate": 0.000286041642433407, "loss": 1.0765, "step": 22000 }, { "epoch": 0.56, "learning_rate": 0.0002846458066767477, "loss": 1.069, "step": 24000 }, { "epoch": 0.6, "learning_rate": 0.00028324997092008837, "loss": 1.0623, "step": 26000 }, { "epoch": 0.65, "learning_rate": 0.00028185413516342906, "loss": 1.0653, "step": 28000 }, { "epoch": 0.69, "learning_rate": 0.0002804582994067698, "loss": 1.0467, "step": 30000 }, { "epoch": 0.74, "learning_rate": 0.00027906246365011044, "loss": 1.0395, "step": 32000 }, { "epoch": 0.79, "learning_rate": 0.0002776666278934512, "loss": 1.0419, "step": 34000 }, { "epoch": 0.83, "learning_rate": 0.0002762707921367919, "loss": 1.0385, "step": 36000 }, { "epoch": 0.88, "learning_rate": 0.00027487495638013257, "loss": 1.0375, "step": 38000 }, { "epoch": 0.93, "learning_rate": 0.00027347912062347326, "loss": 1.0211, "step": 40000 }, { "epoch": 0.97, "learning_rate": 0.00027208328486681395, "loss": 1.0248, "step": 42000 }, { "epoch": 1.02, "learning_rate": 0.0002706874491101547, "loss": 1.0071, "step": 44000 }, { "epoch": 1.07, "learning_rate": 0.0002692916133534954, "loss": 0.9825, "step": 46000 }, { "epoch": 1.11, "learning_rate": 0.0002678957775968361, "loss": 0.983, "step": 48000 }, { "epoch": 1.16, "learning_rate": 0.00026649994184017677, "loss": 0.9831, "step": 50000 }, { "epoch": 1.2, "learning_rate": 0.00026510410608351746, "loss": 0.9792, "step": 52000 }, { "epoch": 1.25, "learning_rate": 0.0002637082703268582, "loss": 0.9744, "step": 54000 }, { "epoch": 1.3, "learning_rate": 0.0002623124345701989, "loss": 0.9668, "step": 56000 }, { "epoch": 1.34, "learning_rate": 0.0002609165988135396, "loss": 0.971, "step": 58000 }, { "epoch": 1.39, "learning_rate": 0.0002595207630568803, "loss": 0.9789, "step": 60000 }, { "epoch": 1.44, "learning_rate": 0.000258124927300221, "loss": 0.9794, "step": 62000 }, { "epoch": 1.48, "learning_rate": 0.00025672909154356166, "loss": 0.9615, "step": 64000 }, { "epoch": 1.53, "learning_rate": 0.0002553332557869024, "loss": 0.9577, "step": 66000 }, { "epoch": 1.57, "learning_rate": 0.0002539374200302431, "loss": 0.9573, "step": 68000 }, { "epoch": 1.62, "learning_rate": 0.0002525415842735838, "loss": 0.9633, "step": 70000 }, { "epoch": 1.67, "learning_rate": 0.0002511457485169245, "loss": 0.946, "step": 72000 }, { "epoch": 1.71, "learning_rate": 0.00024974991276026517, "loss": 0.9528, "step": 74000 }, { "epoch": 1.76, "learning_rate": 0.0002483540770036059, "loss": 0.9474, "step": 76000 }, { "epoch": 1.81, "learning_rate": 0.0002469582412469466, "loss": 0.9431, "step": 78000 }, { "epoch": 1.85, "learning_rate": 0.0002455624054902873, "loss": 0.9469, "step": 80000 }, { "epoch": 1.9, "learning_rate": 0.000244166569733628, "loss": 0.9442, "step": 82000 }, { "epoch": 1.95, "learning_rate": 0.0002427707339769687, "loss": 0.9387, "step": 84000 }, { "epoch": 1.99, "learning_rate": 0.0002413748982203094, "loss": 0.9341, "step": 86000 }, { "epoch": 2.04, "learning_rate": 0.00023997906246365008, "loss": 0.9124, "step": 88000 }, { "epoch": 2.08, "learning_rate": 0.00023858322670699077, "loss": 0.8962, "step": 90000 }, { "epoch": 2.13, "learning_rate": 0.0002371873909503315, "loss": 0.8974, "step": 92000 }, { "epoch": 2.18, "learning_rate": 0.0002357915551936722, "loss": 0.9122, "step": 94000 }, { "epoch": 2.22, "learning_rate": 0.00023439571943701287, "loss": 0.8977, "step": 96000 }, { "epoch": 2.27, "learning_rate": 0.0002329998836803536, "loss": 0.8972, "step": 98000 }, { "epoch": 2.32, "learning_rate": 0.00023160404792369428, "loss": 0.8969, "step": 100000 }, { "epoch": 2.36, "learning_rate": 0.000230208212167035, "loss": 0.9021, "step": 102000 }, { "epoch": 2.41, "learning_rate": 0.00022881237641037572, "loss": 0.9083, "step": 104000 }, { "epoch": 2.45, "learning_rate": 0.00022741654065371638, "loss": 0.8928, "step": 106000 }, { "epoch": 2.5, "learning_rate": 0.0002260207048970571, "loss": 0.8871, "step": 108000 }, { "epoch": 2.55, "learning_rate": 0.0002246248691403978, "loss": 0.9017, "step": 110000 }, { "epoch": 2.59, "learning_rate": 0.0002232290333837385, "loss": 0.8968, "step": 112000 }, { "epoch": 2.64, "learning_rate": 0.00022183319762707917, "loss": 0.8813, "step": 114000 }, { "epoch": 2.69, "learning_rate": 0.0002204373618704199, "loss": 0.8807, "step": 116000 }, { "epoch": 2.73, "learning_rate": 0.0002190415261137606, "loss": 0.8816, "step": 118000 }, { "epoch": 2.78, "learning_rate": 0.0002176456903571013, "loss": 0.8776, "step": 120000 }, { "epoch": 2.83, "learning_rate": 0.000216249854600442, "loss": 0.8738, "step": 122000 }, { "epoch": 2.87, "learning_rate": 0.00021485401884378268, "loss": 0.8826, "step": 124000 }, { "epoch": 2.92, "learning_rate": 0.0002134581830871234, "loss": 0.893, "step": 126000 }, { "epoch": 2.96, "learning_rate": 0.00021206234733046412, "loss": 0.8753, "step": 128000 }, { "epoch": 3.01, "learning_rate": 0.00021066651157380478, "loss": 0.8671, "step": 130000 }, { "epoch": 3.06, "learning_rate": 0.0002092706758171455, "loss": 0.8497, "step": 132000 }, { "epoch": 3.1, "learning_rate": 0.00020787484006048622, "loss": 0.8443, "step": 134000 }, { "epoch": 3.15, "learning_rate": 0.0002064790043038269, "loss": 0.8477, "step": 136000 }, { "epoch": 3.2, "learning_rate": 0.0002050831685471676, "loss": 0.8465, "step": 138000 }, { "epoch": 3.24, "learning_rate": 0.0002036873327905083, "loss": 0.8501, "step": 140000 }, { "epoch": 3.29, "learning_rate": 0.000202291497033849, "loss": 0.8451, "step": 142000 }, { "epoch": 3.33, "learning_rate": 0.00020089566127718972, "loss": 0.84, "step": 144000 }, { "epoch": 3.38, "learning_rate": 0.0001994998255205304, "loss": 0.8482, "step": 146000 }, { "epoch": 3.43, "learning_rate": 0.0001981039897638711, "loss": 0.8383, "step": 148000 }, { "epoch": 3.47, "learning_rate": 0.0001967081540072118, "loss": 0.8353, "step": 150000 }, { "epoch": 3.52, "learning_rate": 0.00019531231825055251, "loss": 0.8334, "step": 152000 }, { "epoch": 3.57, "learning_rate": 0.00019391648249389318, "loss": 0.8446, "step": 154000 }, { "epoch": 3.61, "learning_rate": 0.0001925206467372339, "loss": 0.8373, "step": 156000 }, { "epoch": 3.66, "learning_rate": 0.00019112481098057461, "loss": 0.8412, "step": 158000 }, { "epoch": 3.7, "learning_rate": 0.0001897289752239153, "loss": 0.8286, "step": 160000 }, { "epoch": 3.75, "learning_rate": 0.000188333139467256, "loss": 0.8327, "step": 162000 }, { "epoch": 3.8, "learning_rate": 0.0001869373037105967, "loss": 0.8426, "step": 164000 }, { "epoch": 3.84, "learning_rate": 0.0001855414679539374, "loss": 0.8291, "step": 166000 }, { "epoch": 3.89, "learning_rate": 0.00018414563219727812, "loss": 0.8271, "step": 168000 }, { "epoch": 3.94, "learning_rate": 0.00018274979644061879, "loss": 0.8278, "step": 170000 }, { "epoch": 3.98, "learning_rate": 0.0001813539606839595, "loss": 0.8307, "step": 172000 }, { "epoch": 4.03, "learning_rate": 0.0001799581249273002, "loss": 0.8109, "step": 174000 }, { "epoch": 4.08, "learning_rate": 0.0001785622891706409, "loss": 0.8027, "step": 176000 }, { "epoch": 4.12, "learning_rate": 0.0001771664534139816, "loss": 0.7955, "step": 178000 }, { "epoch": 4.17, "learning_rate": 0.0001757706176573223, "loss": 0.7888, "step": 180000 }, { "epoch": 4.21, "learning_rate": 0.000174374781900663, "loss": 0.799, "step": 182000 }, { "epoch": 4.26, "learning_rate": 0.0001729789461440037, "loss": 0.7949, "step": 184000 }, { "epoch": 4.31, "learning_rate": 0.0001715831103873444, "loss": 0.7913, "step": 186000 }, { "epoch": 4.35, "learning_rate": 0.0001701872746306851, "loss": 0.7862, "step": 188000 }, { "epoch": 4.4, "learning_rate": 0.0001687914388740258, "loss": 0.7882, "step": 190000 }, { "epoch": 4.45, "learning_rate": 0.00016739560311736652, "loss": 0.8017, "step": 192000 }, { "epoch": 4.49, "learning_rate": 0.00016599976736070724, "loss": 0.787, "step": 194000 }, { "epoch": 4.54, "learning_rate": 0.0001646039316040479, "loss": 0.7975, "step": 196000 }, { "epoch": 4.58, "learning_rate": 0.00016320809584738862, "loss": 0.7938, "step": 198000 }, { "epoch": 4.63, "learning_rate": 0.0001618122600907293, "loss": 0.799, "step": 200000 }, { "epoch": 4.68, "learning_rate": 0.00016041642433407003, "loss": 0.7811, "step": 202000 }, { "epoch": 4.72, "learning_rate": 0.0001590205885774107, "loss": 0.7941, "step": 204000 }, { "epoch": 4.77, "learning_rate": 0.0001576247528207514, "loss": 0.7918, "step": 206000 }, { "epoch": 4.82, "learning_rate": 0.00015622891706409213, "loss": 0.8006, "step": 208000 }, { "epoch": 4.86, "learning_rate": 0.00015483308130743282, "loss": 0.7896, "step": 210000 }, { "epoch": 4.91, "learning_rate": 0.0001534372455507735, "loss": 0.7944, "step": 212000 }, { "epoch": 4.96, "learning_rate": 0.0001520414097941142, "loss": 0.7869, "step": 214000 }, { "epoch": 5.0, "learning_rate": 0.00015064557403745492, "loss": 0.7847, "step": 216000 }, { "epoch": 5.05, "learning_rate": 0.0001492497382807956, "loss": 0.7577, "step": 218000 }, { "epoch": 5.09, "learning_rate": 0.00014785390252413633, "loss": 0.7527, "step": 220000 }, { "epoch": 5.14, "learning_rate": 0.00014645806676747702, "loss": 0.7554, "step": 222000 }, { "epoch": 5.19, "learning_rate": 0.0001450622310108177, "loss": 0.7633, "step": 224000 }, { "epoch": 5.23, "learning_rate": 0.0001436663952541584, "loss": 0.754, "step": 226000 }, { "epoch": 5.28, "learning_rate": 0.00014227055949749912, "loss": 0.7438, "step": 228000 }, { "epoch": 5.33, "learning_rate": 0.0001408747237408398, "loss": 0.7514, "step": 230000 }, { "epoch": 5.37, "learning_rate": 0.00013947888798418053, "loss": 0.7522, "step": 232000 }, { "epoch": 5.42, "learning_rate": 0.00013808305222752122, "loss": 0.7525, "step": 234000 }, { "epoch": 5.46, "learning_rate": 0.00013668721647086193, "loss": 0.7476, "step": 236000 }, { "epoch": 5.51, "learning_rate": 0.00013529138071420263, "loss": 0.747, "step": 238000 }, { "epoch": 5.56, "learning_rate": 0.00013389554495754332, "loss": 0.7505, "step": 240000 }, { "epoch": 5.6, "learning_rate": 0.000132499709200884, "loss": 0.7501, "step": 242000 }, { "epoch": 5.65, "learning_rate": 0.00013110387344422472, "loss": 0.7568, "step": 244000 }, { "epoch": 5.7, "learning_rate": 0.00012970803768756542, "loss": 0.7383, "step": 246000 }, { "epoch": 5.74, "learning_rate": 0.00012831220193090613, "loss": 0.7535, "step": 248000 }, { "epoch": 5.79, "learning_rate": 0.00012691636617424682, "loss": 0.751, "step": 250000 }, { "epoch": 5.84, "learning_rate": 0.00012552053041758751, "loss": 0.7396, "step": 252000 }, { "epoch": 5.88, "learning_rate": 0.0001241246946609282, "loss": 0.7448, "step": 254000 }, { "epoch": 5.93, "learning_rate": 0.00012272885890426892, "loss": 0.7521, "step": 256000 }, { "epoch": 5.97, "learning_rate": 0.00012133302314760961, "loss": 0.7472, "step": 258000 }, { "epoch": 6.02, "learning_rate": 0.00011993718739095032, "loss": 0.7344, "step": 260000 }, { "epoch": 6.07, "learning_rate": 0.00011854135163429101, "loss": 0.7143, "step": 262000 }, { "epoch": 6.11, "learning_rate": 0.00011714551587763173, "loss": 0.717, "step": 264000 }, { "epoch": 6.16, "learning_rate": 0.00011574968012097242, "loss": 0.7194, "step": 266000 }, { "epoch": 6.21, "learning_rate": 0.00011435384436431312, "loss": 0.7206, "step": 268000 }, { "epoch": 6.25, "learning_rate": 0.00011295800860765381, "loss": 0.7148, "step": 270000 }, { "epoch": 6.3, "learning_rate": 0.00011156217285099453, "loss": 0.7137, "step": 272000 }, { "epoch": 6.34, "learning_rate": 0.00011016633709433522, "loss": 0.7168, "step": 274000 }, { "epoch": 6.39, "learning_rate": 0.00010877050133767593, "loss": 0.7097, "step": 276000 }, { "epoch": 6.44, "learning_rate": 0.00010737466558101662, "loss": 0.7084, "step": 278000 }, { "epoch": 6.48, "learning_rate": 0.00010597882982435732, "loss": 0.7101, "step": 280000 }, { "epoch": 6.53, "learning_rate": 0.00010458299406769801, "loss": 0.7117, "step": 282000 }, { "epoch": 6.58, "learning_rate": 0.00010318715831103873, "loss": 0.7067, "step": 284000 }, { "epoch": 6.62, "learning_rate": 0.00010179132255437942, "loss": 0.7026, "step": 286000 }, { "epoch": 6.67, "learning_rate": 0.00010039548679772013, "loss": 0.7144, "step": 288000 }, { "epoch": 6.72, "learning_rate": 9.899965104106083e-05, "loss": 0.7118, "step": 290000 }, { "epoch": 6.76, "learning_rate": 9.760381528440152e-05, "loss": 0.7028, "step": 292000 }, { "epoch": 6.81, "learning_rate": 9.620797952774224e-05, "loss": 0.7044, "step": 294000 }, { "epoch": 6.85, "learning_rate": 9.481214377108293e-05, "loss": 0.704, "step": 296000 }, { "epoch": 6.9, "learning_rate": 9.341630801442363e-05, "loss": 0.7049, "step": 298000 }, { "epoch": 6.95, "learning_rate": 9.202047225776432e-05, "loss": 0.6937, "step": 300000 }, { "epoch": 6.99, "learning_rate": 9.062463650110504e-05, "loss": 0.7096, "step": 302000 }, { "epoch": 7.04, "learning_rate": 8.922880074444573e-05, "loss": 0.6749, "step": 304000 }, { "epoch": 7.09, "learning_rate": 8.783296498778644e-05, "loss": 0.6718, "step": 306000 }, { "epoch": 7.13, "learning_rate": 8.643712923112713e-05, "loss": 0.6789, "step": 308000 }, { "epoch": 7.18, "learning_rate": 8.504129347446783e-05, "loss": 0.6805, "step": 310000 }, { "epoch": 7.22, "learning_rate": 8.364545771780852e-05, "loss": 0.6776, "step": 312000 }, { "epoch": 7.27, "learning_rate": 8.224962196114924e-05, "loss": 0.6734, "step": 314000 }, { "epoch": 7.32, "learning_rate": 8.085378620448993e-05, "loss": 0.6765, "step": 316000 }, { "epoch": 7.36, "learning_rate": 7.945795044783064e-05, "loss": 0.6709, "step": 318000 }, { "epoch": 7.41, "learning_rate": 7.806211469117133e-05, "loss": 0.6804, "step": 320000 }, { "epoch": 7.46, "learning_rate": 7.666627893451203e-05, "loss": 0.6688, "step": 322000 }, { "epoch": 7.5, "learning_rate": 7.527044317785274e-05, "loss": 0.6789, "step": 324000 }, { "epoch": 7.55, "learning_rate": 7.387460742119343e-05, "loss": 0.6699, "step": 326000 }, { "epoch": 7.6, "learning_rate": 7.247877166453413e-05, "loss": 0.662, "step": 328000 }, { "epoch": 7.64, "learning_rate": 7.108293590787484e-05, "loss": 0.6657, "step": 330000 }, { "epoch": 7.69, "learning_rate": 6.968710015121553e-05, "loss": 0.6676, "step": 332000 }, { "epoch": 7.73, "learning_rate": 6.829126439455623e-05, "loss": 0.6698, "step": 334000 }, { "epoch": 7.78, "learning_rate": 6.689542863789693e-05, "loss": 0.6755, "step": 336000 }, { "epoch": 7.83, "learning_rate": 6.549959288123764e-05, "loss": 0.672, "step": 338000 }, { "epoch": 7.87, "learning_rate": 6.410375712457834e-05, "loss": 0.6616, "step": 340000 }, { "epoch": 7.92, "learning_rate": 6.270792136791903e-05, "loss": 0.6654, "step": 342000 }, { "epoch": 7.97, "learning_rate": 6.131208561125974e-05, "loss": 0.6606, "step": 344000 }, { "epoch": 8.01, "learning_rate": 5.991624985460044e-05, "loss": 0.6642, "step": 346000 }, { "epoch": 8.06, "learning_rate": 5.852041409794114e-05, "loss": 0.639, "step": 348000 }, { "epoch": 8.1, "learning_rate": 5.712457834128184e-05, "loss": 0.6463, "step": 350000 }, { "epoch": 8.15, "learning_rate": 5.572874258462254e-05, "loss": 0.6426, "step": 352000 }, { "epoch": 8.2, "learning_rate": 5.433290682796324e-05, "loss": 0.6356, "step": 354000 }, { "epoch": 8.24, "learning_rate": 5.2937071071303944e-05, "loss": 0.6336, "step": 356000 }, { "epoch": 8.29, "learning_rate": 5.154123531464464e-05, "loss": 0.631, "step": 358000 }, { "epoch": 8.34, "learning_rate": 5.014539955798534e-05, "loss": 0.6425, "step": 360000 }, { "epoch": 8.38, "learning_rate": 4.8749563801326044e-05, "loss": 0.6432, "step": 362000 }, { "epoch": 8.43, "learning_rate": 4.735372804466674e-05, "loss": 0.6449, "step": 364000 }, { "epoch": 8.48, "learning_rate": 4.595789228800744e-05, "loss": 0.635, "step": 366000 }, { "epoch": 8.52, "learning_rate": 4.456205653134814e-05, "loss": 0.6352, "step": 368000 }, { "epoch": 8.57, "learning_rate": 4.316622077468884e-05, "loss": 0.6281, "step": 370000 }, { "epoch": 8.61, "learning_rate": 4.1770385018029545e-05, "loss": 0.6291, "step": 372000 }, { "epoch": 8.66, "learning_rate": 4.037454926137024e-05, "loss": 0.6317, "step": 374000 }, { "epoch": 8.71, "learning_rate": 3.897871350471094e-05, "loss": 0.6296, "step": 376000 }, { "epoch": 8.75, "learning_rate": 3.7582877748051644e-05, "loss": 0.6279, "step": 378000 }, { "epoch": 8.8, "learning_rate": 3.618704199139234e-05, "loss": 0.6379, "step": 380000 }, { "epoch": 8.85, "learning_rate": 3.4791206234733046e-05, "loss": 0.6292, "step": 382000 }, { "epoch": 8.89, "learning_rate": 3.3395370478073744e-05, "loss": 0.6248, "step": 384000 }, { "epoch": 8.94, "learning_rate": 3.199953472141444e-05, "loss": 0.6199, "step": 386000 }, { "epoch": 8.98, "learning_rate": 3.0603698964755146e-05, "loss": 0.6173, "step": 388000 }, { "epoch": 9.03, "learning_rate": 2.9207863208095843e-05, "loss": 0.6129, "step": 390000 }, { "epoch": 9.08, "learning_rate": 2.7812027451436544e-05, "loss": 0.6013, "step": 392000 }, { "epoch": 9.12, "learning_rate": 2.6416191694777245e-05, "loss": 0.6045, "step": 394000 }, { "epoch": 9.17, "learning_rate": 2.5020355938117946e-05, "loss": 0.6134, "step": 396000 }, { "epoch": 9.22, "learning_rate": 2.3624520181458644e-05, "loss": 0.6056, "step": 398000 }, { "epoch": 9.26, "learning_rate": 2.2228684424799345e-05, "loss": 0.6112, "step": 400000 }, { "epoch": 9.31, "learning_rate": 2.0832848668140046e-05, "loss": 0.6086, "step": 402000 }, { "epoch": 9.35, "learning_rate": 1.9437012911480747e-05, "loss": 0.6079, "step": 404000 }, { "epoch": 9.4, "learning_rate": 1.8041177154821448e-05, "loss": 0.6012, "step": 406000 }, { "epoch": 9.45, "learning_rate": 1.664534139816215e-05, "loss": 0.6029, "step": 408000 }, { "epoch": 9.49, "learning_rate": 1.5249505641502848e-05, "loss": 0.5948, "step": 410000 }, { "epoch": 9.54, "learning_rate": 1.3853669884843549e-05, "loss": 0.6018, "step": 412000 }, { "epoch": 9.59, "learning_rate": 1.2457834128184248e-05, "loss": 0.5983, "step": 414000 }, { "epoch": 9.63, "learning_rate": 1.1061998371524949e-05, "loss": 0.6046, "step": 416000 }, { "epoch": 9.68, "learning_rate": 9.666162614865652e-06, "loss": 0.5971, "step": 418000 }, { "epoch": 9.73, "learning_rate": 8.27032685820635e-06, "loss": 0.6002, "step": 420000 }, { "epoch": 9.77, "learning_rate": 6.87449110154705e-06, "loss": 0.598, "step": 422000 }, { "epoch": 9.82, "learning_rate": 5.478655344887752e-06, "loss": 0.6042, "step": 424000 }, { "epoch": 9.86, "learning_rate": 4.082819588228451e-06, "loss": 0.5917, "step": 426000 }, { "epoch": 9.91, "learning_rate": 2.686983831569152e-06, "loss": 0.5985, "step": 428000 }, { "epoch": 9.96, "learning_rate": 1.2911480749098522e-06, "loss": 0.6036, "step": 430000 }, { "epoch": 10.0, "step": 431850, "total_flos": 8.831009191456261e+20, "train_loss": 0.806209775733219, "train_runtime": 212917.0057, "train_samples_per_second": 32.452, "train_steps_per_second": 2.028 } ], "logging_steps": 2000, "max_steps": 431850, "num_train_epochs": 10, "save_steps": 500, "total_flos": 8.831009191456261e+20, "trial_name": null, "trial_params": null }