|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.99988422039805, |
|
"eval_steps": 500, |
|
"global_step": 431850, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0003, |
|
"loss": 3.5282, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029860416424334066, |
|
"loss": 1.3191, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002972083284866814, |
|
"loss": 1.2226, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00029581249273002205, |
|
"loss": 1.1832, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002944166569733628, |
|
"loss": 1.1595, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002930208212167035, |
|
"loss": 1.1368, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00029162498546004417, |
|
"loss": 1.1176, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00029022914970338486, |
|
"loss": 1.1044, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00028883331394672555, |
|
"loss": 1.0954, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002874374781900663, |
|
"loss": 1.088, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.000286041642433407, |
|
"loss": 1.0765, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002846458066767477, |
|
"loss": 1.069, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00028324997092008837, |
|
"loss": 1.0623, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00028185413516342906, |
|
"loss": 1.0653, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002804582994067698, |
|
"loss": 1.0467, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00027906246365011044, |
|
"loss": 1.0395, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002776666278934512, |
|
"loss": 1.0419, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002762707921367919, |
|
"loss": 1.0385, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00027487495638013257, |
|
"loss": 1.0375, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00027347912062347326, |
|
"loss": 1.0211, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00027208328486681395, |
|
"loss": 1.0248, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002706874491101547, |
|
"loss": 1.0071, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002692916133534954, |
|
"loss": 0.9825, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0002678957775968361, |
|
"loss": 0.983, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00026649994184017677, |
|
"loss": 0.9831, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00026510410608351746, |
|
"loss": 0.9792, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0002637082703268582, |
|
"loss": 0.9744, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0002623124345701989, |
|
"loss": 0.9668, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0002609165988135396, |
|
"loss": 0.971, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002595207630568803, |
|
"loss": 0.9789, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.000258124927300221, |
|
"loss": 0.9794, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00025672909154356166, |
|
"loss": 0.9615, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002553332557869024, |
|
"loss": 0.9577, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002539374200302431, |
|
"loss": 0.9573, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0002525415842735838, |
|
"loss": 0.9633, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0002511457485169245, |
|
"loss": 0.946, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00024974991276026517, |
|
"loss": 0.9528, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0002483540770036059, |
|
"loss": 0.9474, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0002469582412469466, |
|
"loss": 0.9431, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0002455624054902873, |
|
"loss": 0.9469, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.000244166569733628, |
|
"loss": 0.9442, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0002427707339769687, |
|
"loss": 0.9387, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0002413748982203094, |
|
"loss": 0.9341, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00023997906246365008, |
|
"loss": 0.9124, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00023858322670699077, |
|
"loss": 0.8962, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002371873909503315, |
|
"loss": 0.8974, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0002357915551936722, |
|
"loss": 0.9122, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00023439571943701287, |
|
"loss": 0.8977, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002329998836803536, |
|
"loss": 0.8972, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00023160404792369428, |
|
"loss": 0.8969, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.000230208212167035, |
|
"loss": 0.9021, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00022881237641037572, |
|
"loss": 0.9083, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00022741654065371638, |
|
"loss": 0.8928, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0002260207048970571, |
|
"loss": 0.8871, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002246248691403978, |
|
"loss": 0.9017, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0002232290333837385, |
|
"loss": 0.8968, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00022183319762707917, |
|
"loss": 0.8813, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0002204373618704199, |
|
"loss": 0.8807, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.0002190415261137606, |
|
"loss": 0.8816, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0002176456903571013, |
|
"loss": 0.8776, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.000216249854600442, |
|
"loss": 0.8738, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00021485401884378268, |
|
"loss": 0.8826, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0002134581830871234, |
|
"loss": 0.893, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00021206234733046412, |
|
"loss": 0.8753, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00021066651157380478, |
|
"loss": 0.8671, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0002092706758171455, |
|
"loss": 0.8497, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.00020787484006048622, |
|
"loss": 0.8443, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0002064790043038269, |
|
"loss": 0.8477, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0002050831685471676, |
|
"loss": 0.8465, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0002036873327905083, |
|
"loss": 0.8501, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.000202291497033849, |
|
"loss": 0.8451, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00020089566127718972, |
|
"loss": 0.84, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0001994998255205304, |
|
"loss": 0.8482, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0001981039897638711, |
|
"loss": 0.8383, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.0001967081540072118, |
|
"loss": 0.8353, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00019531231825055251, |
|
"loss": 0.8334, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.00019391648249389318, |
|
"loss": 0.8446, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.0001925206467372339, |
|
"loss": 0.8373, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.00019112481098057461, |
|
"loss": 0.8412, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.0001897289752239153, |
|
"loss": 0.8286, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.000188333139467256, |
|
"loss": 0.8327, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.0001869373037105967, |
|
"loss": 0.8426, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0001855414679539374, |
|
"loss": 0.8291, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00018414563219727812, |
|
"loss": 0.8271, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00018274979644061879, |
|
"loss": 0.8278, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0001813539606839595, |
|
"loss": 0.8307, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0001799581249273002, |
|
"loss": 0.8109, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.0001785622891706409, |
|
"loss": 0.8027, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0001771664534139816, |
|
"loss": 0.7955, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.0001757706176573223, |
|
"loss": 0.7888, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.000174374781900663, |
|
"loss": 0.799, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0001729789461440037, |
|
"loss": 0.7949, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.0001715831103873444, |
|
"loss": 0.7913, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0001701872746306851, |
|
"loss": 0.7862, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0001687914388740258, |
|
"loss": 0.7882, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.00016739560311736652, |
|
"loss": 0.8017, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00016599976736070724, |
|
"loss": 0.787, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0001646039316040479, |
|
"loss": 0.7975, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00016320809584738862, |
|
"loss": 0.7938, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.0001618122600907293, |
|
"loss": 0.799, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.00016041642433407003, |
|
"loss": 0.7811, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.0001590205885774107, |
|
"loss": 0.7941, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.0001576247528207514, |
|
"loss": 0.7918, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.00015622891706409213, |
|
"loss": 0.8006, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.00015483308130743282, |
|
"loss": 0.7896, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.0001534372455507735, |
|
"loss": 0.7944, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.0001520414097941142, |
|
"loss": 0.7869, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00015064557403745492, |
|
"loss": 0.7847, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0001492497382807956, |
|
"loss": 0.7577, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.00014785390252413633, |
|
"loss": 0.7527, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00014645806676747702, |
|
"loss": 0.7554, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0001450622310108177, |
|
"loss": 0.7633, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.0001436663952541584, |
|
"loss": 0.754, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00014227055949749912, |
|
"loss": 0.7438, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0001408747237408398, |
|
"loss": 0.7514, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.00013947888798418053, |
|
"loss": 0.7522, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00013808305222752122, |
|
"loss": 0.7525, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.00013668721647086193, |
|
"loss": 0.7476, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.00013529138071420263, |
|
"loss": 0.747, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00013389554495754332, |
|
"loss": 0.7505, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.000132499709200884, |
|
"loss": 0.7501, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00013110387344422472, |
|
"loss": 0.7568, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00012970803768756542, |
|
"loss": 0.7383, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00012831220193090613, |
|
"loss": 0.7535, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00012691636617424682, |
|
"loss": 0.751, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.00012552053041758751, |
|
"loss": 0.7396, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.0001241246946609282, |
|
"loss": 0.7448, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00012272885890426892, |
|
"loss": 0.7521, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.00012133302314760961, |
|
"loss": 0.7472, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00011993718739095032, |
|
"loss": 0.7344, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00011854135163429101, |
|
"loss": 0.7143, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00011714551587763173, |
|
"loss": 0.717, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.00011574968012097242, |
|
"loss": 0.7194, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00011435384436431312, |
|
"loss": 0.7206, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00011295800860765381, |
|
"loss": 0.7148, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00011156217285099453, |
|
"loss": 0.7137, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00011016633709433522, |
|
"loss": 0.7168, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00010877050133767593, |
|
"loss": 0.7097, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.00010737466558101662, |
|
"loss": 0.7084, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00010597882982435732, |
|
"loss": 0.7101, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.00010458299406769801, |
|
"loss": 0.7117, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.00010318715831103873, |
|
"loss": 0.7067, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00010179132255437942, |
|
"loss": 0.7026, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00010039548679772013, |
|
"loss": 0.7144, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 9.899965104106083e-05, |
|
"loss": 0.7118, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 9.760381528440152e-05, |
|
"loss": 0.7028, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 9.620797952774224e-05, |
|
"loss": 0.7044, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 9.481214377108293e-05, |
|
"loss": 0.704, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 9.341630801442363e-05, |
|
"loss": 0.7049, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 9.202047225776432e-05, |
|
"loss": 0.6937, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 9.062463650110504e-05, |
|
"loss": 0.7096, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 8.922880074444573e-05, |
|
"loss": 0.6749, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 8.783296498778644e-05, |
|
"loss": 0.6718, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 8.643712923112713e-05, |
|
"loss": 0.6789, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 8.504129347446783e-05, |
|
"loss": 0.6805, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 8.364545771780852e-05, |
|
"loss": 0.6776, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 8.224962196114924e-05, |
|
"loss": 0.6734, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 8.085378620448993e-05, |
|
"loss": 0.6765, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 7.945795044783064e-05, |
|
"loss": 0.6709, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 7.806211469117133e-05, |
|
"loss": 0.6804, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 7.666627893451203e-05, |
|
"loss": 0.6688, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 7.527044317785274e-05, |
|
"loss": 0.6789, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 7.387460742119343e-05, |
|
"loss": 0.6699, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 7.247877166453413e-05, |
|
"loss": 0.662, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 7.108293590787484e-05, |
|
"loss": 0.6657, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 6.968710015121553e-05, |
|
"loss": 0.6676, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 6.829126439455623e-05, |
|
"loss": 0.6698, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 6.689542863789693e-05, |
|
"loss": 0.6755, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.549959288123764e-05, |
|
"loss": 0.672, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 6.410375712457834e-05, |
|
"loss": 0.6616, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.270792136791903e-05, |
|
"loss": 0.6654, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 6.131208561125974e-05, |
|
"loss": 0.6606, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 5.991624985460044e-05, |
|
"loss": 0.6642, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 5.852041409794114e-05, |
|
"loss": 0.639, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 5.712457834128184e-05, |
|
"loss": 0.6463, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 5.572874258462254e-05, |
|
"loss": 0.6426, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 5.433290682796324e-05, |
|
"loss": 0.6356, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 5.2937071071303944e-05, |
|
"loss": 0.6336, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 5.154123531464464e-05, |
|
"loss": 0.631, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 5.014539955798534e-05, |
|
"loss": 0.6425, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 4.8749563801326044e-05, |
|
"loss": 0.6432, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 4.735372804466674e-05, |
|
"loss": 0.6449, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 4.595789228800744e-05, |
|
"loss": 0.635, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 4.456205653134814e-05, |
|
"loss": 0.6352, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 4.316622077468884e-05, |
|
"loss": 0.6281, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 4.1770385018029545e-05, |
|
"loss": 0.6291, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 4.037454926137024e-05, |
|
"loss": 0.6317, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 3.897871350471094e-05, |
|
"loss": 0.6296, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 3.7582877748051644e-05, |
|
"loss": 0.6279, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 3.618704199139234e-05, |
|
"loss": 0.6379, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 3.4791206234733046e-05, |
|
"loss": 0.6292, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 3.3395370478073744e-05, |
|
"loss": 0.6248, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 3.199953472141444e-05, |
|
"loss": 0.6199, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 3.0603698964755146e-05, |
|
"loss": 0.6173, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 2.9207863208095843e-05, |
|
"loss": 0.6129, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 2.7812027451436544e-05, |
|
"loss": 0.6013, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 2.6416191694777245e-05, |
|
"loss": 0.6045, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 2.5020355938117946e-05, |
|
"loss": 0.6134, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 2.3624520181458644e-05, |
|
"loss": 0.6056, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 2.2228684424799345e-05, |
|
"loss": 0.6112, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 2.0832848668140046e-05, |
|
"loss": 0.6086, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 1.9437012911480747e-05, |
|
"loss": 0.6079, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 1.8041177154821448e-05, |
|
"loss": 0.6012, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 1.664534139816215e-05, |
|
"loss": 0.6029, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 1.5249505641502848e-05, |
|
"loss": 0.5948, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 1.3853669884843549e-05, |
|
"loss": 0.6018, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 1.2457834128184248e-05, |
|
"loss": 0.5983, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.1061998371524949e-05, |
|
"loss": 0.6046, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 9.666162614865652e-06, |
|
"loss": 0.5971, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 8.27032685820635e-06, |
|
"loss": 0.6002, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 6.87449110154705e-06, |
|
"loss": 0.598, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 5.478655344887752e-06, |
|
"loss": 0.6042, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 4.082819588228451e-06, |
|
"loss": 0.5917, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 2.686983831569152e-06, |
|
"loss": 0.5985, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 1.2911480749098522e-06, |
|
"loss": 0.6036, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 431850, |
|
"total_flos": 8.831009191456261e+20, |
|
"train_loss": 0.806209775733219, |
|
"train_runtime": 212917.0057, |
|
"train_samples_per_second": 32.452, |
|
"train_steps_per_second": 2.028 |
|
} |
|
], |
|
"logging_steps": 2000, |
|
"max_steps": 431850, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 8.831009191456261e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|