|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 1181220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.61872513332769e-05, |
|
"loss": 11.8677, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001523745026665538, |
|
"loss": 1.9864, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00022856175399983065, |
|
"loss": 1.5124, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002999520269675143, |
|
"loss": 1.4304, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00029918240612549776, |
|
"loss": 1.3803, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00029841278528348127, |
|
"loss": 1.3166, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002976431644414648, |
|
"loss": 1.2798, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00029687354359944824, |
|
"loss": 1.2517, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002961039227574317, |
|
"loss": 1.2085, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002953343019154152, |
|
"loss": 1.197, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002945646810733987, |
|
"loss": 1.1841, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002937950602313822, |
|
"loss": 1.1658, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002930254393893657, |
|
"loss": 1.1471, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002922558185473492, |
|
"loss": 1.0998, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0002914861977053327, |
|
"loss": 1.0865, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0002907165768633162, |
|
"loss": 1.0697, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00028994695602129963, |
|
"loss": 1.0537, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00028917733517928315, |
|
"loss": 1.0509, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00028840771433726666, |
|
"loss": 1.039, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0002876380934952501, |
|
"loss": 1.0224, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00028686847265323363, |
|
"loss": 1.0209, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00028609885181121714, |
|
"loss": 1.0103, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002853292309692006, |
|
"loss": 1.0058, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00028455961012718406, |
|
"loss": 0.9954, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00028378998928516757, |
|
"loss": 0.9815, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0002830203684431511, |
|
"loss": 0.9832, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00028225074760113454, |
|
"loss": 0.9556, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00028148112675911805, |
|
"loss": 0.9396, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00028071150591710156, |
|
"loss": 0.9337, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.000279941885075085, |
|
"loss": 0.9211, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00027917226423306853, |
|
"loss": 0.9296, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00027840264339105205, |
|
"loss": 0.9216, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0002776330225490355, |
|
"loss": 0.9132, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.000276863401707019, |
|
"loss": 0.9041, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0002760937808650025, |
|
"loss": 0.9056, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.000275324160022986, |
|
"loss": 0.9025, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0002745545391809695, |
|
"loss": 0.9021, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00027378491833895296, |
|
"loss": 0.8958, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00027301529749693647, |
|
"loss": 0.888, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00027224567665492, |
|
"loss": 0.8677, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00027147605581290344, |
|
"loss": 0.8561, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0002707064349708869, |
|
"loss": 0.8507, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0002699368141288704, |
|
"loss": 0.8409, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0002691671932868539, |
|
"loss": 0.8394, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0002683975724448374, |
|
"loss": 0.8426, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0002676279516028209, |
|
"loss": 0.8401, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.0002668583307608044, |
|
"loss": 0.8449, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.00026608870991878786, |
|
"loss": 0.838, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.0002653190890767714, |
|
"loss": 0.834, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00026454946823475483, |
|
"loss": 0.8239, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00026377984739273835, |
|
"loss": 0.8228, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.00026301022655072186, |
|
"loss": 0.8277, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0002622406057087053, |
|
"loss": 0.8003, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00026147098486668883, |
|
"loss": 0.7925, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.00026070136402467234, |
|
"loss": 0.7911, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0002599317431826558, |
|
"loss": 0.7944, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.00025916212234063926, |
|
"loss": 0.795, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.00025839250149862277, |
|
"loss": 0.7868, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0002576228806566063, |
|
"loss": 0.7821, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00025685325981458974, |
|
"loss": 0.7859, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.00025608363897257325, |
|
"loss": 0.7765, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00025531401813055676, |
|
"loss": 0.775, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0002545443972885402, |
|
"loss": 0.7768, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00025377477644652373, |
|
"loss": 0.7757, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0002530051556045072, |
|
"loss": 0.7701, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.0002522355347624907, |
|
"loss": 0.7557, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0002514659139204742, |
|
"loss": 0.7312, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0002506962930784577, |
|
"loss": 0.7372, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.0002499266722364412, |
|
"loss": 0.7383, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0002491570513944247, |
|
"loss": 0.7398, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00024838743055240816, |
|
"loss": 0.7229, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00024761780971039167, |
|
"loss": 0.733, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00024684818886837513, |
|
"loss": 0.7318, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.00024607856802635864, |
|
"loss": 0.7243, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.0002453089471843421, |
|
"loss": 0.73, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.0002445393263423256, |
|
"loss": 0.7289, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.00024376970550030912, |
|
"loss": 0.7331, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.0002430000846582926, |
|
"loss": 0.7203, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0002422304638162761, |
|
"loss": 0.7235, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0002414608429742596, |
|
"loss": 0.6931, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.0002406912221322431, |
|
"loss": 0.6933, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00023992160129022655, |
|
"loss": 0.697, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00023915198044821003, |
|
"loss": 0.6926, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00023838235960619355, |
|
"loss": 0.6953, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00023761273876417703, |
|
"loss": 0.6934, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00023684311792216054, |
|
"loss": 0.6926, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00023607349708014403, |
|
"loss": 0.6969, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00023530387623812751, |
|
"loss": 0.6866, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00023453425539611103, |
|
"loss": 0.6929, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00023376463455409448, |
|
"loss": 0.6914, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.00023299501371207797, |
|
"loss": 0.6866, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.00023222539287006145, |
|
"loss": 0.6806, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00023145577202804497, |
|
"loss": 0.6561, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00023068615118602845, |
|
"loss": 0.6596, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.00022991653034401196, |
|
"loss": 0.6621, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00022914690950199545, |
|
"loss": 0.6644, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.00022837728865997893, |
|
"loss": 0.6642, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.0002276076678179624, |
|
"loss": 0.6621, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.0002268380469759459, |
|
"loss": 0.6596, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0002260684261339294, |
|
"loss": 0.6616, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.00022529880529191288, |
|
"loss": 0.666, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.0002245291844498964, |
|
"loss": 0.6645, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.00022375956360787987, |
|
"loss": 0.6616, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.00022298994276586339, |
|
"loss": 0.6593, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00022222032192384687, |
|
"loss": 0.6622, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 0.00022145070108183033, |
|
"loss": 0.6294, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.0002206810802398138, |
|
"loss": 0.6308, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.00021991145939779733, |
|
"loss": 0.6335, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.0002191418385557808, |
|
"loss": 0.629, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.0002183722177137643, |
|
"loss": 0.6354, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 0.0002176025968717478, |
|
"loss": 0.6303, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.0002168329760297313, |
|
"loss": 0.6338, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 0.0002160633551877148, |
|
"loss": 0.6317, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.00021529373434569826, |
|
"loss": 0.6339, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.00021452411350368175, |
|
"loss": 0.6344, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.00021375449266166523, |
|
"loss": 0.631, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 0.00021298487181964875, |
|
"loss": 0.6273, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.00021221525097763223, |
|
"loss": 0.6276, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 0.00021144563013561572, |
|
"loss": 0.6045, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.00021067600929359923, |
|
"loss": 0.6012, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.00020990638845158271, |
|
"loss": 0.599, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.00020913676760956623, |
|
"loss": 0.6073, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.00020836714676754968, |
|
"loss": 0.6053, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.00020759752592553317, |
|
"loss": 0.6032, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00020682790508351666, |
|
"loss": 0.6035, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.00020605828424150017, |
|
"loss": 0.6058, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 0.00020528866339948365, |
|
"loss": 0.6045, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.00020451904255746714, |
|
"loss": 0.6034, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.00020374942171545065, |
|
"loss": 0.6016, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.00020297980087343414, |
|
"loss": 0.599, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 0.0002022101800314176, |
|
"loss": 0.6076, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.0002014405591894011, |
|
"loss": 0.5825, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 0.0002006709383473846, |
|
"loss": 0.5748, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.00019990131750536808, |
|
"loss": 0.5793, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.0001991316966633516, |
|
"loss": 0.5767, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 0.00019836207582133507, |
|
"loss": 0.5839, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 0.00019759245497931856, |
|
"loss": 0.5834, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 0.00019682283413730207, |
|
"loss": 0.5824, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 0.00019605321329528553, |
|
"loss": 0.5817, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 0.00019528359245326901, |
|
"loss": 0.5785, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.00019451397161125253, |
|
"loss": 0.5777, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 0.000193744350769236, |
|
"loss": 0.5787, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 0.0001929747299272195, |
|
"loss": 0.5803, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 0.000192205109085203, |
|
"loss": 0.5772, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 0.0001914354882431865, |
|
"loss": 0.5645, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 0.00019066586740116998, |
|
"loss": 0.5497, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.00018989624655915346, |
|
"loss": 0.5556, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 0.00018912662571713695, |
|
"loss": 0.5562, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.00018835700487512043, |
|
"loss": 0.5523, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.00018758738403310395, |
|
"loss": 0.5595, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 0.00018681776319108743, |
|
"loss": 0.5638, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 0.00018604814234907092, |
|
"loss": 0.5593, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 0.00018527852150705443, |
|
"loss": 0.5588, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 0.00018450890066503791, |
|
"loss": 0.5549, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 0.00018373927982302137, |
|
"loss": 0.5659, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 0.00018296965898100489, |
|
"loss": 0.5567, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 0.00018220003813898837, |
|
"loss": 0.5594, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 0.00018143041729697186, |
|
"loss": 0.5468, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 0.00018066079645495537, |
|
"loss": 0.5331, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 0.00017989117561293885, |
|
"loss": 0.5387, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.00017912155477092234, |
|
"loss": 0.54, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 0.00017835193392890585, |
|
"loss": 0.5396, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 0.00017758231308688934, |
|
"loss": 0.5388, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.0001768126922448728, |
|
"loss": 0.5422, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 0.0001760430714028563, |
|
"loss": 0.5353, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 0.0001752734505608398, |
|
"loss": 0.536, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 0.00017450382971882328, |
|
"loss": 0.5418, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0001737342088768068, |
|
"loss": 0.5449, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 0.00017296458803479027, |
|
"loss": 0.5385, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 0.00017219496719277376, |
|
"loss": 0.5465, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 0.00017142534635075727, |
|
"loss": 0.5263, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 0.00017065572550874073, |
|
"loss": 0.5144, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 0.00016988610466672421, |
|
"loss": 0.5184, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 0.00016911648382470773, |
|
"loss": 0.5191, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.0001683468629826912, |
|
"loss": 0.5202, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 0.0001675772421406747, |
|
"loss": 0.5225, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 0.0001668076212986582, |
|
"loss": 0.5247, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.0001660380004566417, |
|
"loss": 0.521, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 0.00016526837961462518, |
|
"loss": 0.5251, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.00016449875877260866, |
|
"loss": 0.5247, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 0.00016372913793059215, |
|
"loss": 0.5219, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 0.00016295951708857564, |
|
"loss": 0.522, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.00016218989624655915, |
|
"loss": 0.5207, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 0.00016142027540454263, |
|
"loss": 0.5167, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.00016065065456252612, |
|
"loss": 0.5018, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 0.00015988103372050963, |
|
"loss": 0.5001, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 0.00015911141287849312, |
|
"loss": 0.4997, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 0.00015834179203647657, |
|
"loss": 0.5007, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.00015757217119446009, |
|
"loss": 0.5028, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 0.00015680255035244357, |
|
"loss": 0.5004, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 0.00015603292951042706, |
|
"loss": 0.5028, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 0.00015526330866841057, |
|
"loss": 0.5053, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 0.00015449368782639405, |
|
"loss": 0.5058, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 0.00015372406698437754, |
|
"loss": 0.5037, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 0.00015295444614236105, |
|
"loss": 0.5051, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.0001521848253003445, |
|
"loss": 0.5041, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 0.000151415204458328, |
|
"loss": 0.5031, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 0.0001506455836163115, |
|
"loss": 0.4792, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 0.000149875962774295, |
|
"loss": 0.4834, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 0.00014910634193227848, |
|
"loss": 0.4833, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 0.000148336721090262, |
|
"loss": 0.4821, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 0.00014756710024824547, |
|
"loss": 0.4869, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 0.00014679747940622896, |
|
"loss": 0.4823, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 0.00014602785856421244, |
|
"loss": 0.486, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 0.00014525823772219593, |
|
"loss": 0.4847, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 0.00014448861688017944, |
|
"loss": 0.4897, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 0.00014371899603816293, |
|
"loss": 0.4886, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 0.0001429493751961464, |
|
"loss": 0.491, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 0.0001421797543541299, |
|
"loss": 0.488, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001414101335121134, |
|
"loss": 0.4881, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 0.00014064051267009687, |
|
"loss": 0.4638, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 0.00013987089182808038, |
|
"loss": 0.4701, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 0.00013910127098606387, |
|
"loss": 0.4688, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 0.00013833165014404735, |
|
"loss": 0.4663, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 0.00013756202930203084, |
|
"loss": 0.4669, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 16.46, |
|
"learning_rate": 0.00013679240846001435, |
|
"loss": 0.4658, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 0.00013602278761799783, |
|
"loss": 0.463, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 0.00013525316677598132, |
|
"loss": 0.4683, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 0.0001344835459339648, |
|
"loss": 0.4669, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 0.0001337139250919483, |
|
"loss": 0.4691, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 0.0001329443042499318, |
|
"loss": 0.4701, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 0.00013217468340791529, |
|
"loss": 0.4698, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 0.00013140506256589877, |
|
"loss": 0.4738, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 0.00013063544172388226, |
|
"loss": 0.4584, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 0.00012986582088186577, |
|
"loss": 0.4488, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 0.00012909620003984925, |
|
"loss": 0.4494, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 17.3, |
|
"learning_rate": 0.00012832657919783274, |
|
"loss": 0.4521, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 0.00012755695835581622, |
|
"loss": 0.4551, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 0.0001267873375137997, |
|
"loss": 0.4511, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 0.00012601771667178322, |
|
"loss": 0.4536, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.00012524809582976668, |
|
"loss": 0.4532, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 0.0001244784749877502, |
|
"loss": 0.4541, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 0.00012370885414573368, |
|
"loss": 0.4535, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"learning_rate": 0.0001229392333037172, |
|
"loss": 0.4516, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 0.00012216961246170065, |
|
"loss": 0.4519, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"learning_rate": 0.00012139999161968415, |
|
"loss": 0.4577, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 0.00012063037077766764, |
|
"loss": 0.437, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 0.00011986074993565114, |
|
"loss": 0.437, |
|
"step": 714000 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 0.00011909112909363462, |
|
"loss": 0.4363, |
|
"step": 717000 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"learning_rate": 0.00011832150825161811, |
|
"loss": 0.4365, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 0.00011755188740960161, |
|
"loss": 0.4351, |
|
"step": 723000 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 0.0001167822665675851, |
|
"loss": 0.4356, |
|
"step": 726000 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 0.0001160126457255686, |
|
"loss": 0.4404, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 0.00011524302488355208, |
|
"loss": 0.4386, |
|
"step": 732000 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 0.00011447340404153557, |
|
"loss": 0.4345, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 0.00011370378319951907, |
|
"loss": 0.4362, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"learning_rate": 0.00011293416235750256, |
|
"loss": 0.4397, |
|
"step": 741000 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 0.00011216454151548604, |
|
"loss": 0.4408, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 0.00011139492067346953, |
|
"loss": 0.4406, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 0.00011062529983145303, |
|
"loss": 0.4269, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 0.00010985567898943652, |
|
"loss": 0.4185, |
|
"step": 753000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.00010908605814742, |
|
"loss": 0.4227, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"learning_rate": 0.0001083164373054035, |
|
"loss": 0.4208, |
|
"step": 759000 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 0.00010754681646338699, |
|
"loss": 0.4222, |
|
"step": 762000 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 0.00010677719562137049, |
|
"loss": 0.4209, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 0.00010600757477935397, |
|
"loss": 0.4235, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"learning_rate": 0.00010523795393733746, |
|
"loss": 0.4229, |
|
"step": 771000 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 0.00010446833309532096, |
|
"loss": 0.4263, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 19.73, |
|
"learning_rate": 0.00010369871225330445, |
|
"loss": 0.4247, |
|
"step": 777000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 0.00010292909141128793, |
|
"loss": 0.4224, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 0.00010215947056927142, |
|
"loss": 0.422, |
|
"step": 783000 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 0.00010138984972725492, |
|
"loss": 0.4247, |
|
"step": 786000 |
|
}, |
|
{ |
|
"epoch": 20.04, |
|
"learning_rate": 0.00010062022888523841, |
|
"loss": 0.4111, |
|
"step": 789000 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 9.98506080432219e-05, |
|
"loss": 0.4058, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 9.908098720120539e-05, |
|
"loss": 0.4069, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 9.831136635918888e-05, |
|
"loss": 0.4046, |
|
"step": 798000 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 9.754174551717238e-05, |
|
"loss": 0.4051, |
|
"step": 801000 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"learning_rate": 9.677212467515585e-05, |
|
"loss": 0.4079, |
|
"step": 804000 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 9.600250383313935e-05, |
|
"loss": 0.4045, |
|
"step": 807000 |
|
}, |
|
{ |
|
"epoch": 20.57, |
|
"learning_rate": 9.523288299112285e-05, |
|
"loss": 0.4083, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 9.446326214910634e-05, |
|
"loss": 0.408, |
|
"step": 813000 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"learning_rate": 9.369364130708982e-05, |
|
"loss": 0.4074, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 9.292402046507331e-05, |
|
"loss": 0.41, |
|
"step": 819000 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"learning_rate": 9.215439962305681e-05, |
|
"loss": 0.4093, |
|
"step": 822000 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"learning_rate": 9.13847787810403e-05, |
|
"loss": 0.411, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"learning_rate": 9.061515793902378e-05, |
|
"loss": 0.4029, |
|
"step": 828000 |
|
}, |
|
{ |
|
"epoch": 21.11, |
|
"learning_rate": 8.984553709700728e-05, |
|
"loss": 0.3928, |
|
"step": 831000 |
|
}, |
|
{ |
|
"epoch": 21.18, |
|
"learning_rate": 8.907591625499077e-05, |
|
"loss": 0.3953, |
|
"step": 834000 |
|
}, |
|
{ |
|
"epoch": 21.26, |
|
"learning_rate": 8.830629541297427e-05, |
|
"loss": 0.3899, |
|
"step": 837000 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 8.753667457095774e-05, |
|
"loss": 0.3938, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 21.41, |
|
"learning_rate": 8.676705372894124e-05, |
|
"loss": 0.3958, |
|
"step": 843000 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 8.599743288692473e-05, |
|
"loss": 0.3952, |
|
"step": 846000 |
|
}, |
|
{ |
|
"epoch": 21.56, |
|
"learning_rate": 8.522781204490823e-05, |
|
"loss": 0.3959, |
|
"step": 849000 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 8.445819120289172e-05, |
|
"loss": 0.3989, |
|
"step": 852000 |
|
}, |
|
{ |
|
"epoch": 21.71, |
|
"learning_rate": 8.36885703608752e-05, |
|
"loss": 0.3976, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 21.79, |
|
"learning_rate": 8.29189495188587e-05, |
|
"loss": 0.3935, |
|
"step": 858000 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"learning_rate": 8.214932867684219e-05, |
|
"loss": 0.3951, |
|
"step": 861000 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 8.137970783482569e-05, |
|
"loss": 0.3976, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 8.061008699280916e-05, |
|
"loss": 0.388, |
|
"step": 867000 |
|
}, |
|
{ |
|
"epoch": 22.1, |
|
"learning_rate": 7.984046615079266e-05, |
|
"loss": 0.3745, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"learning_rate": 7.907084530877616e-05, |
|
"loss": 0.382, |
|
"step": 873000 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 7.830122446675965e-05, |
|
"loss": 0.3833, |
|
"step": 876000 |
|
}, |
|
{ |
|
"epoch": 22.32, |
|
"learning_rate": 7.753160362474313e-05, |
|
"loss": 0.3837, |
|
"step": 879000 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 7.676198278272662e-05, |
|
"loss": 0.3792, |
|
"step": 882000 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"learning_rate": 7.599236194071012e-05, |
|
"loss": 0.3826, |
|
"step": 885000 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 7.522274109869361e-05, |
|
"loss": 0.3802, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 7.44531202566771e-05, |
|
"loss": 0.3796, |
|
"step": 891000 |
|
}, |
|
{ |
|
"epoch": 22.71, |
|
"learning_rate": 7.368349941466058e-05, |
|
"loss": 0.383, |
|
"step": 894000 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"learning_rate": 7.291387857264408e-05, |
|
"loss": 0.3804, |
|
"step": 897000 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 7.214425773062756e-05, |
|
"loss": 0.3802, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 7.137463688861106e-05, |
|
"loss": 0.383, |
|
"step": 903000 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 7.060501604659455e-05, |
|
"loss": 0.3775, |
|
"step": 906000 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 6.983539520457805e-05, |
|
"loss": 0.3663, |
|
"step": 909000 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"learning_rate": 6.906577436256153e-05, |
|
"loss": 0.3634, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 6.829615352054503e-05, |
|
"loss": 0.3691, |
|
"step": 915000 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"learning_rate": 6.752653267852851e-05, |
|
"loss": 0.3721, |
|
"step": 918000 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"learning_rate": 6.6756911836512e-05, |
|
"loss": 0.3694, |
|
"step": 921000 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 6.59872909944955e-05, |
|
"loss": 0.371, |
|
"step": 924000 |
|
}, |
|
{ |
|
"epoch": 23.54, |
|
"learning_rate": 6.521767015247898e-05, |
|
"loss": 0.3668, |
|
"step": 927000 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 6.444804931046247e-05, |
|
"loss": 0.3701, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 23.7, |
|
"learning_rate": 6.367842846844597e-05, |
|
"loss": 0.37, |
|
"step": 933000 |
|
}, |
|
{ |
|
"epoch": 23.77, |
|
"learning_rate": 6.290880762642945e-05, |
|
"loss": 0.3682, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 23.85, |
|
"learning_rate": 6.213918678441295e-05, |
|
"loss": 0.3698, |
|
"step": 939000 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 6.136956594239644e-05, |
|
"loss": 0.3681, |
|
"step": 942000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 6.0599945100379935e-05, |
|
"loss": 0.3685, |
|
"step": 945000 |
|
}, |
|
{ |
|
"epoch": 24.08, |
|
"learning_rate": 5.983032425836342e-05, |
|
"loss": 0.3552, |
|
"step": 948000 |
|
}, |
|
{ |
|
"epoch": 24.15, |
|
"learning_rate": 5.906070341634691e-05, |
|
"loss": 0.3553, |
|
"step": 951000 |
|
}, |
|
{ |
|
"epoch": 24.23, |
|
"learning_rate": 5.82910825743304e-05, |
|
"loss": 0.3523, |
|
"step": 954000 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 5.7521461732313896e-05, |
|
"loss": 0.3557, |
|
"step": 957000 |
|
}, |
|
{ |
|
"epoch": 24.38, |
|
"learning_rate": 5.675184089029738e-05, |
|
"loss": 0.3573, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 24.46, |
|
"learning_rate": 5.5982220048280873e-05, |
|
"loss": 0.357, |
|
"step": 963000 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 5.5212599206264365e-05, |
|
"loss": 0.358, |
|
"step": 966000 |
|
}, |
|
{ |
|
"epoch": 24.61, |
|
"learning_rate": 5.444297836424786e-05, |
|
"loss": 0.3572, |
|
"step": 969000 |
|
}, |
|
{ |
|
"epoch": 24.69, |
|
"learning_rate": 5.367335752223134e-05, |
|
"loss": 0.3589, |
|
"step": 972000 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"learning_rate": 5.290373668021484e-05, |
|
"loss": 0.3585, |
|
"step": 975000 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"learning_rate": 5.213411583819833e-05, |
|
"loss": 0.3544, |
|
"step": 978000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 5.136449499618182e-05, |
|
"loss": 0.3542, |
|
"step": 981000 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"learning_rate": 5.059487415416532e-05, |
|
"loss": 0.358, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 4.98252533121488e-05, |
|
"loss": 0.3488, |
|
"step": 987000 |
|
}, |
|
{ |
|
"epoch": 25.14, |
|
"learning_rate": 4.9055632470132294e-05, |
|
"loss": 0.346, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 4.8286011628115786e-05, |
|
"loss": 0.3437, |
|
"step": 993000 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"learning_rate": 4.751639078609928e-05, |
|
"loss": 0.3474, |
|
"step": 996000 |
|
}, |
|
{ |
|
"epoch": 25.37, |
|
"learning_rate": 4.674676994408276e-05, |
|
"loss": 0.3452, |
|
"step": 999000 |
|
}, |
|
{ |
|
"epoch": 25.45, |
|
"learning_rate": 4.597714910206626e-05, |
|
"loss": 0.3439, |
|
"step": 1002000 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 4.520752826004975e-05, |
|
"loss": 0.3475, |
|
"step": 1005000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 4.443790741803324e-05, |
|
"loss": 0.3441, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 25.68, |
|
"learning_rate": 4.366828657601673e-05, |
|
"loss": 0.345, |
|
"step": 1011000 |
|
}, |
|
{ |
|
"epoch": 25.75, |
|
"learning_rate": 4.289866573400022e-05, |
|
"loss": 0.3467, |
|
"step": 1014000 |
|
}, |
|
{ |
|
"epoch": 25.83, |
|
"learning_rate": 4.212904489198371e-05, |
|
"loss": 0.3473, |
|
"step": 1017000 |
|
}, |
|
{ |
|
"epoch": 25.91, |
|
"learning_rate": 4.135942404996721e-05, |
|
"loss": 0.3435, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 25.98, |
|
"learning_rate": 4.058980320795069e-05, |
|
"loss": 0.3441, |
|
"step": 1023000 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 3.9820182365934184e-05, |
|
"loss": 0.3416, |
|
"step": 1026000 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 3.9050561523917676e-05, |
|
"loss": 0.3348, |
|
"step": 1029000 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 3.828094068190117e-05, |
|
"loss": 0.3382, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"learning_rate": 3.751131983988465e-05, |
|
"loss": 0.3383, |
|
"step": 1035000 |
|
}, |
|
{ |
|
"epoch": 26.36, |
|
"learning_rate": 3.6741698997868145e-05, |
|
"loss": 0.3358, |
|
"step": 1038000 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"learning_rate": 3.597207815585164e-05, |
|
"loss": 0.3355, |
|
"step": 1041000 |
|
}, |
|
{ |
|
"epoch": 26.51, |
|
"learning_rate": 3.520245731383513e-05, |
|
"loss": 0.3377, |
|
"step": 1044000 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"learning_rate": 3.443283647181862e-05, |
|
"loss": 0.3358, |
|
"step": 1047000 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 3.366321562980211e-05, |
|
"loss": 0.336, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 26.74, |
|
"learning_rate": 3.2893594787785605e-05, |
|
"loss": 0.336, |
|
"step": 1053000 |
|
}, |
|
{ |
|
"epoch": 26.82, |
|
"learning_rate": 3.21239739457691e-05, |
|
"loss": 0.3329, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"learning_rate": 3.135435310375258e-05, |
|
"loss": 0.3364, |
|
"step": 1059000 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"learning_rate": 3.0584732261736074e-05, |
|
"loss": 0.3354, |
|
"step": 1062000 |
|
}, |
|
{ |
|
"epoch": 27.05, |
|
"learning_rate": 2.9815111419719566e-05, |
|
"loss": 0.3311, |
|
"step": 1065000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 2.9045490577703058e-05, |
|
"loss": 0.3287, |
|
"step": 1068000 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 2.8275869735686546e-05, |
|
"loss": 0.3288, |
|
"step": 1071000 |
|
}, |
|
{ |
|
"epoch": 27.28, |
|
"learning_rate": 2.7506248893670038e-05, |
|
"loss": 0.3261, |
|
"step": 1074000 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"learning_rate": 2.673662805165353e-05, |
|
"loss": 0.3272, |
|
"step": 1077000 |
|
}, |
|
{ |
|
"epoch": 27.43, |
|
"learning_rate": 2.596700720963702e-05, |
|
"loss": 0.3306, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 27.51, |
|
"learning_rate": 2.519738636762051e-05, |
|
"loss": 0.3268, |
|
"step": 1083000 |
|
}, |
|
{ |
|
"epoch": 27.58, |
|
"learning_rate": 2.4427765525604003e-05, |
|
"loss": 0.3256, |
|
"step": 1086000 |
|
}, |
|
{ |
|
"epoch": 27.66, |
|
"learning_rate": 2.365814468358749e-05, |
|
"loss": 0.325, |
|
"step": 1089000 |
|
}, |
|
{ |
|
"epoch": 27.73, |
|
"learning_rate": 2.2888523841570983e-05, |
|
"loss": 0.3258, |
|
"step": 1092000 |
|
}, |
|
{ |
|
"epoch": 27.81, |
|
"learning_rate": 2.2118902999554475e-05, |
|
"loss": 0.3249, |
|
"step": 1095000 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"learning_rate": 2.1349282157537964e-05, |
|
"loss": 0.3272, |
|
"step": 1098000 |
|
}, |
|
{ |
|
"epoch": 27.96, |
|
"learning_rate": 2.0579661315521456e-05, |
|
"loss": 0.3233, |
|
"step": 1101000 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"learning_rate": 1.9810040473504944e-05, |
|
"loss": 0.3239, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"learning_rate": 1.9040419631488436e-05, |
|
"loss": 0.3223, |
|
"step": 1107000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 1.8270798789471928e-05, |
|
"loss": 0.3188, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 1.7501177947455417e-05, |
|
"loss": 0.3191, |
|
"step": 1113000 |
|
}, |
|
{ |
|
"epoch": 28.34, |
|
"learning_rate": 1.673155710543891e-05, |
|
"loss": 0.3201, |
|
"step": 1116000 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"learning_rate": 1.59619362634224e-05, |
|
"loss": 0.32, |
|
"step": 1119000 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 1.519231542140589e-05, |
|
"loss": 0.318, |
|
"step": 1122000 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 1.4422694579389381e-05, |
|
"loss": 0.3171, |
|
"step": 1125000 |
|
}, |
|
{ |
|
"epoch": 28.65, |
|
"learning_rate": 1.3653073737372871e-05, |
|
"loss": 0.3195, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 28.72, |
|
"learning_rate": 1.2883452895356363e-05, |
|
"loss": 0.3199, |
|
"step": 1131000 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 1.2113832053339853e-05, |
|
"loss": 0.3201, |
|
"step": 1134000 |
|
}, |
|
{ |
|
"epoch": 28.88, |
|
"learning_rate": 1.1344211211323345e-05, |
|
"loss": 0.3207, |
|
"step": 1137000 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"learning_rate": 1.0574590369306837e-05, |
|
"loss": 0.3172, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 9.804969527290328e-06, |
|
"loss": 0.3143, |
|
"step": 1143000 |
|
}, |
|
{ |
|
"epoch": 29.11, |
|
"learning_rate": 9.035348685273818e-06, |
|
"loss": 0.3172, |
|
"step": 1146000 |
|
}, |
|
{ |
|
"epoch": 29.18, |
|
"learning_rate": 8.265727843257308e-06, |
|
"loss": 0.3168, |
|
"step": 1149000 |
|
}, |
|
{ |
|
"epoch": 29.26, |
|
"learning_rate": 7.4961070012408e-06, |
|
"loss": 0.3138, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 6.72648615922429e-06, |
|
"loss": 0.3136, |
|
"step": 1155000 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"learning_rate": 5.956865317207781e-06, |
|
"loss": 0.317, |
|
"step": 1158000 |
|
}, |
|
{ |
|
"epoch": 29.49, |
|
"learning_rate": 5.187244475191272e-06, |
|
"loss": 0.316, |
|
"step": 1161000 |
|
}, |
|
{ |
|
"epoch": 29.56, |
|
"learning_rate": 4.417623633174763e-06, |
|
"loss": 0.3103, |
|
"step": 1164000 |
|
}, |
|
{ |
|
"epoch": 29.64, |
|
"learning_rate": 3.6480027911582534e-06, |
|
"loss": 0.3164, |
|
"step": 1167000 |
|
}, |
|
{ |
|
"epoch": 29.72, |
|
"learning_rate": 2.878381949141744e-06, |
|
"loss": 0.315, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 29.79, |
|
"learning_rate": 2.108761107125235e-06, |
|
"loss": 0.3147, |
|
"step": 1173000 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"learning_rate": 1.3391402651087259e-06, |
|
"loss": 0.3113, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"learning_rate": 5.695194230922169e-07, |
|
"loss": 0.3131, |
|
"step": 1179000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 1181220, |
|
"total_flos": 2.398747739778959e+21, |
|
"train_loss": 0.5846844887318071, |
|
"train_runtime": 684864.5478, |
|
"train_samples_per_second": 27.596, |
|
"train_steps_per_second": 1.725 |
|
} |
|
], |
|
"logging_steps": 3000, |
|
"max_steps": 1181220, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 2.398747739778959e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|