|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.999701550445437, |
|
"eval_steps": 500, |
|
"global_step": 670120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001342882721575649, |
|
"loss": 12.1916, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002685765443151298, |
|
"loss": 2.3149, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00029896083615458125, |
|
"loss": 1.9954, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00029760422539032704, |
|
"loss": 1.8429, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002962476146260728, |
|
"loss": 1.7657, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002948910038618186, |
|
"loss": 1.6944, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002935343930975644, |
|
"loss": 1.6484, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002921777823333102, |
|
"loss": 1.6187, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00029082117156905597, |
|
"loss": 1.5982, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00028946456080480176, |
|
"loss": 1.5673, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00028810795004054755, |
|
"loss": 1.5474, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002867513392762934, |
|
"loss": 1.4948, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0002853947285120391, |
|
"loss": 1.4913, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00028403811774778496, |
|
"loss": 1.4821, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0002826815069835307, |
|
"loss": 1.4474, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00028132489621927653, |
|
"loss": 1.4602, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00027996828545502226, |
|
"loss": 1.456, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0002786116746907681, |
|
"loss": 1.4377, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0002772550639265139, |
|
"loss": 1.4301, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0002758984531622597, |
|
"loss": 1.425, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00027454184239800546, |
|
"loss": 1.4153, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00027318523163375125, |
|
"loss": 1.4046, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00027182862086949704, |
|
"loss": 1.3565, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.0002704720101052428, |
|
"loss": 1.3488, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0002691153993409886, |
|
"loss": 1.339, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002677587885767344, |
|
"loss": 1.3513, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0002664021778124802, |
|
"loss": 1.3491, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00026504556704822597, |
|
"loss": 1.3345, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00026368895628397175, |
|
"loss": 1.3313, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00026233234551971754, |
|
"loss": 1.3319, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00026097573475546333, |
|
"loss": 1.3185, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00025961912399120917, |
|
"loss": 1.325, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0002582625132269549, |
|
"loss": 1.3048, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00025690590246270074, |
|
"loss": 1.2797, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00025554929169844653, |
|
"loss": 1.2646, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002541926809341923, |
|
"loss": 1.2595, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.0002528360701699381, |
|
"loss": 1.2574, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0002514794594056839, |
|
"loss": 1.2623, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0002501228486414297, |
|
"loss": 1.2548, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00024876623787717546, |
|
"loss": 1.2615, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00024740962711292125, |
|
"loss": 1.2558, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.00024605301634866703, |
|
"loss": 1.2537, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.0002446964055844128, |
|
"loss": 1.2393, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0002433397948201586, |
|
"loss": 1.2438, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00024198318405590442, |
|
"loss": 1.2277, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00024062657329165018, |
|
"loss": 1.1934, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.000239269962527396, |
|
"loss": 1.1857, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00023791335176314175, |
|
"loss": 1.1902, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00023655674099888756, |
|
"loss": 1.1901, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00023520013023463332, |
|
"loss": 1.1948, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00023384351947037914, |
|
"loss": 1.1941, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.00023248690870612495, |
|
"loss": 1.1903, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0002311302979418707, |
|
"loss": 1.1877, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.00022977368717761652, |
|
"loss": 1.1974, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.00022841707641336228, |
|
"loss": 1.189, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.0002270604656491081, |
|
"loss": 1.1851, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00022570385488485386, |
|
"loss": 1.1479, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00022434724412059967, |
|
"loss": 1.1374, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00022299063335634543, |
|
"loss": 1.1343, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.00022163402259209124, |
|
"loss": 1.1306, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.00022027741182783706, |
|
"loss": 1.1399, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.00021892080106358282, |
|
"loss": 1.1457, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.00021756419029932863, |
|
"loss": 1.1469, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.0002162075795350744, |
|
"loss": 1.1448, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.0002148509687708202, |
|
"loss": 1.1397, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.00021349435800656596, |
|
"loss": 1.1441, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00021213774724231177, |
|
"loss": 1.1453, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00021078113647805753, |
|
"loss": 1.0897, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00020942452571380335, |
|
"loss": 1.0956, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0002080679149495491, |
|
"loss": 1.0947, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00020671130418529492, |
|
"loss": 1.0961, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00020535469342104073, |
|
"loss": 1.1117, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0002039980826567865, |
|
"loss": 1.1032, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.0002026414718925323, |
|
"loss": 1.0983, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.00020128486112827807, |
|
"loss": 1.0885, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.00019992825036402388, |
|
"loss": 1.0867, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.00019857163959976964, |
|
"loss": 1.0993, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.00019721502883551545, |
|
"loss": 1.1021, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00019585841807126124, |
|
"loss": 1.0519, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00019450180730700702, |
|
"loss": 1.0594, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00019314519654275284, |
|
"loss": 1.0555, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.0001917885857784986, |
|
"loss": 1.057, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.0001904319750142444, |
|
"loss": 1.0585, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00018907536424999017, |
|
"loss": 1.0534, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.00018771875348573598, |
|
"loss": 1.0655, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.00018636214272148174, |
|
"loss": 1.056, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.00018500553195722756, |
|
"loss": 1.0638, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.00018364892119297334, |
|
"loss": 1.0521, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00018229231042871913, |
|
"loss": 1.0633, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.00018093569966446492, |
|
"loss": 1.0345, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.0001795790889002107, |
|
"loss": 1.0186, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.00017822247813595652, |
|
"loss": 1.0141, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.00017686586737170228, |
|
"loss": 1.0184, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 0.0001755092566074481, |
|
"loss": 1.0184, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 0.00017415264584319385, |
|
"loss": 1.0222, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 0.00017279603507893966, |
|
"loss": 1.0176, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 0.00017143942431468545, |
|
"loss": 1.0236, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.00017008281355043123, |
|
"loss": 1.0278, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.00016872620278617702, |
|
"loss": 1.0076, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.0001673695920219228, |
|
"loss": 1.0248, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.00016601298125766862, |
|
"loss": 0.9915, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.00016465637049341438, |
|
"loss": 0.9759, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.0001632997597291602, |
|
"loss": 0.9813, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.00016194314896490595, |
|
"loss": 0.9853, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.00016058653820065177, |
|
"loss": 0.9808, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.00015922992743639755, |
|
"loss": 0.9759, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.00015787331667214334, |
|
"loss": 0.9852, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00015651670590788913, |
|
"loss": 0.9796, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.0001551600951436349, |
|
"loss": 0.9871, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.0001538034843793807, |
|
"loss": 0.9953, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.00015244687361512649, |
|
"loss": 0.9883, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 0.0001510902628508723, |
|
"loss": 0.9735, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.00014973365208661809, |
|
"loss": 0.9509, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.00014837704132236387, |
|
"loss": 0.9448, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 0.00014702043055810966, |
|
"loss": 0.9395, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 0.00014566381979385544, |
|
"loss": 0.9438, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.00014430720902960123, |
|
"loss": 0.9498, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.00014295059826534702, |
|
"loss": 0.9481, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.0001415939875010928, |
|
"loss": 0.9509, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.0001402373767368386, |
|
"loss": 0.9527, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0001388807659725844, |
|
"loss": 0.944, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 0.0001375241552083302, |
|
"loss": 0.9427, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 0.00013616754444407598, |
|
"loss": 0.9511, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 0.00013481093367982176, |
|
"loss": 0.901, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 0.00013345432291556755, |
|
"loss": 0.9175, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 0.00013209771215131334, |
|
"loss": 0.9061, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 0.00013074110138705912, |
|
"loss": 0.9175, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 0.0001293844906228049, |
|
"loss": 0.9175, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 0.0001280278798585507, |
|
"loss": 0.9149, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 0.0001266712690942965, |
|
"loss": 0.9155, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 0.0001253146583300423, |
|
"loss": 0.9129, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 0.00012395804756578808, |
|
"loss": 0.9178, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 0.00012260143680153387, |
|
"loss": 0.912, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.00012124482603727964, |
|
"loss": 0.9217, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 0.00011988821527302545, |
|
"loss": 0.8778, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 0.00011853160450877124, |
|
"loss": 0.8741, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.00011717499374451703, |
|
"loss": 0.8786, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 0.00011581838298026281, |
|
"loss": 0.8837, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.0001144617722160086, |
|
"loss": 0.883, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 0.00011310516145175439, |
|
"loss": 0.8764, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 0.00011174855068750017, |
|
"loss": 0.8881, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.00011039193992324596, |
|
"loss": 0.8844, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.00010903532915899175, |
|
"loss": 0.8838, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 0.00010767871839473755, |
|
"loss": 0.8799, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 0.00010632210763048335, |
|
"loss": 0.8766, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 0.00010496549686622913, |
|
"loss": 0.8562, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 0.00010360888610197492, |
|
"loss": 0.8445, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 0.0001022522753377207, |
|
"loss": 0.8422, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 0.00010089566457346649, |
|
"loss": 0.8405, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 9.953905380921228e-05, |
|
"loss": 0.8456, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 9.818244304495806e-05, |
|
"loss": 0.8516, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 9.682583228070386e-05, |
|
"loss": 0.8514, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 9.546922151644965e-05, |
|
"loss": 0.8465, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 9.411261075219544e-05, |
|
"loss": 0.8499, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 9.275599998794124e-05, |
|
"loss": 0.8582, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 9.139938922368702e-05, |
|
"loss": 0.8544, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 9.004277845943281e-05, |
|
"loss": 0.8226, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 8.86861676951786e-05, |
|
"loss": 0.8132, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 8.732955693092438e-05, |
|
"loss": 0.8196, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 8.597294616667018e-05, |
|
"loss": 0.8221, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 8.461633540241597e-05, |
|
"loss": 0.8155, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 8.325972463816176e-05, |
|
"loss": 0.8219, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 8.190311387390754e-05, |
|
"loss": 0.8171, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 8.054650310965333e-05, |
|
"loss": 0.8116, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 7.918989234539913e-05, |
|
"loss": 0.8213, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 7.783328158114491e-05, |
|
"loss": 0.8154, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 7.64766708168907e-05, |
|
"loss": 0.824, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 7.512006005263649e-05, |
|
"loss": 0.8068, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 7.376344928838229e-05, |
|
"loss": 0.7813, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 7.240683852412807e-05, |
|
"loss": 0.7947, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 7.105022775987386e-05, |
|
"loss": 0.7899, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 6.969361699561965e-05, |
|
"loss": 0.7885, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 6.833700623136545e-05, |
|
"loss": 0.7963, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 6.698039546711123e-05, |
|
"loss": 0.787, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 6.562378470285702e-05, |
|
"loss": 0.7877, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 6.42671739386028e-05, |
|
"loss": 0.7949, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 6.291056317434859e-05, |
|
"loss": 0.7835, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 6.155395241009439e-05, |
|
"loss": 0.7904, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 6.019734164584017e-05, |
|
"loss": 0.7797, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 5.8840730881585965e-05, |
|
"loss": 0.7606, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 5.748412011733175e-05, |
|
"loss": 0.7671, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 5.6127509353077545e-05, |
|
"loss": 0.764, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 5.477089858882333e-05, |
|
"loss": 0.758, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 5.3414287824569124e-05, |
|
"loss": 0.7518, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"learning_rate": 5.205767706031491e-05, |
|
"loss": 0.7644, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 5.07010662960607e-05, |
|
"loss": 0.7577, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 4.934445553180649e-05, |
|
"loss": 0.762, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 4.7987844767552283e-05, |
|
"loss": 0.7548, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"learning_rate": 4.663123400329807e-05, |
|
"loss": 0.7567, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 4.5274623239043856e-05, |
|
"loss": 0.7613, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 4.391801247478964e-05, |
|
"loss": 0.7346, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 4.256140171053544e-05, |
|
"loss": 0.7323, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 4.120479094628123e-05, |
|
"loss": 0.7322, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 3.9848180182027016e-05, |
|
"loss": 0.7456, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 17.46, |
|
"learning_rate": 3.84915694177728e-05, |
|
"loss": 0.7324, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 3.7134958653518595e-05, |
|
"loss": 0.7414, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"learning_rate": 3.577834788926438e-05, |
|
"loss": 0.7334, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 3.442173712501017e-05, |
|
"loss": 0.731, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 3.306512636075596e-05, |
|
"loss": 0.7488, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 3.170851559650175e-05, |
|
"loss": 0.7287, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.035190483224754e-05, |
|
"loss": 0.7361, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 2.8995294067993327e-05, |
|
"loss": 0.7173, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 2.763868330373912e-05, |
|
"loss": 0.7173, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"learning_rate": 2.6282072539484907e-05, |
|
"loss": 0.7138, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"learning_rate": 2.4925461775230697e-05, |
|
"loss": 0.71, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 2.3568851010976486e-05, |
|
"loss": 0.7128, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 2.2212240246722276e-05, |
|
"loss": 0.7168, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 2.0855629482468066e-05, |
|
"loss": 0.7184, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 1.9499018718213856e-05, |
|
"loss": 0.7099, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 1.8142407953959646e-05, |
|
"loss": 0.7047, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 1.6785797189705435e-05, |
|
"loss": 0.7131, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"learning_rate": 1.5429186425451222e-05, |
|
"loss": 0.7151, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 1.4072575661197012e-05, |
|
"loss": 0.7058, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"learning_rate": 1.2715964896942801e-05, |
|
"loss": 0.6982, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 19.25, |
|
"learning_rate": 1.1359354132688591e-05, |
|
"loss": 0.6983, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 1.0002743368434381e-05, |
|
"loss": 0.6932, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 8.64613260418017e-06, |
|
"loss": 0.7025, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 7.289521839925958e-06, |
|
"loss": 0.6945, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 5.932911075671748e-06, |
|
"loss": 0.7039, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 4.576300311417537e-06, |
|
"loss": 0.6921, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 3.2196895471633263e-06, |
|
"loss": 0.6914, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 1.863078782909116e-06, |
|
"loss": 0.6969, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 19.97, |
|
"learning_rate": 5.064680186549053e-07, |
|
"loss": 0.6948, |
|
"step": 669000 |
|
} |
|
], |
|
"logging_steps": 3000, |
|
"max_steps": 670120, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.67362005330918e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|