jako_xlsr_100p_run1 / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
9dd8d20
raw
history blame contribute delete
No virus
28.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.999701550445437,
"eval_steps": 500,
"global_step": 670120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"learning_rate": 0.0001342882721575649,
"loss": 12.1916,
"step": 3000
},
{
"epoch": 0.18,
"learning_rate": 0.0002685765443151298,
"loss": 2.3149,
"step": 6000
},
{
"epoch": 0.27,
"learning_rate": 0.00029896083615458125,
"loss": 1.9954,
"step": 9000
},
{
"epoch": 0.36,
"learning_rate": 0.00029760422539032704,
"loss": 1.8429,
"step": 12000
},
{
"epoch": 0.45,
"learning_rate": 0.0002962476146260728,
"loss": 1.7657,
"step": 15000
},
{
"epoch": 0.54,
"learning_rate": 0.0002948910038618186,
"loss": 1.6944,
"step": 18000
},
{
"epoch": 0.63,
"learning_rate": 0.0002935343930975644,
"loss": 1.6484,
"step": 21000
},
{
"epoch": 0.72,
"learning_rate": 0.0002921777823333102,
"loss": 1.6187,
"step": 24000
},
{
"epoch": 0.81,
"learning_rate": 0.00029082117156905597,
"loss": 1.5982,
"step": 27000
},
{
"epoch": 0.9,
"learning_rate": 0.00028946456080480176,
"loss": 1.5673,
"step": 30000
},
{
"epoch": 0.98,
"learning_rate": 0.00028810795004054755,
"loss": 1.5474,
"step": 33000
},
{
"epoch": 1.07,
"learning_rate": 0.0002867513392762934,
"loss": 1.4948,
"step": 36000
},
{
"epoch": 1.16,
"learning_rate": 0.0002853947285120391,
"loss": 1.4913,
"step": 39000
},
{
"epoch": 1.25,
"learning_rate": 0.00028403811774778496,
"loss": 1.4821,
"step": 42000
},
{
"epoch": 1.34,
"learning_rate": 0.0002826815069835307,
"loss": 1.4474,
"step": 45000
},
{
"epoch": 1.43,
"learning_rate": 0.00028132489621927653,
"loss": 1.4602,
"step": 48000
},
{
"epoch": 1.52,
"learning_rate": 0.00027996828545502226,
"loss": 1.456,
"step": 51000
},
{
"epoch": 1.61,
"learning_rate": 0.0002786116746907681,
"loss": 1.4377,
"step": 54000
},
{
"epoch": 1.7,
"learning_rate": 0.0002772550639265139,
"loss": 1.4301,
"step": 57000
},
{
"epoch": 1.79,
"learning_rate": 0.0002758984531622597,
"loss": 1.425,
"step": 60000
},
{
"epoch": 1.88,
"learning_rate": 0.00027454184239800546,
"loss": 1.4153,
"step": 63000
},
{
"epoch": 1.97,
"learning_rate": 0.00027318523163375125,
"loss": 1.4046,
"step": 66000
},
{
"epoch": 2.06,
"learning_rate": 0.00027182862086949704,
"loss": 1.3565,
"step": 69000
},
{
"epoch": 2.15,
"learning_rate": 0.0002704720101052428,
"loss": 1.3488,
"step": 72000
},
{
"epoch": 2.24,
"learning_rate": 0.0002691153993409886,
"loss": 1.339,
"step": 75000
},
{
"epoch": 2.33,
"learning_rate": 0.0002677587885767344,
"loss": 1.3513,
"step": 78000
},
{
"epoch": 2.42,
"learning_rate": 0.0002664021778124802,
"loss": 1.3491,
"step": 81000
},
{
"epoch": 2.51,
"learning_rate": 0.00026504556704822597,
"loss": 1.3345,
"step": 84000
},
{
"epoch": 2.6,
"learning_rate": 0.00026368895628397175,
"loss": 1.3313,
"step": 87000
},
{
"epoch": 2.69,
"learning_rate": 0.00026233234551971754,
"loss": 1.3319,
"step": 90000
},
{
"epoch": 2.78,
"learning_rate": 0.00026097573475546333,
"loss": 1.3185,
"step": 93000
},
{
"epoch": 2.87,
"learning_rate": 0.00025961912399120917,
"loss": 1.325,
"step": 96000
},
{
"epoch": 2.95,
"learning_rate": 0.0002582625132269549,
"loss": 1.3048,
"step": 99000
},
{
"epoch": 3.04,
"learning_rate": 0.00025690590246270074,
"loss": 1.2797,
"step": 102000
},
{
"epoch": 3.13,
"learning_rate": 0.00025554929169844653,
"loss": 1.2646,
"step": 105000
},
{
"epoch": 3.22,
"learning_rate": 0.0002541926809341923,
"loss": 1.2595,
"step": 108000
},
{
"epoch": 3.31,
"learning_rate": 0.0002528360701699381,
"loss": 1.2574,
"step": 111000
},
{
"epoch": 3.4,
"learning_rate": 0.0002514794594056839,
"loss": 1.2623,
"step": 114000
},
{
"epoch": 3.49,
"learning_rate": 0.0002501228486414297,
"loss": 1.2548,
"step": 117000
},
{
"epoch": 3.58,
"learning_rate": 0.00024876623787717546,
"loss": 1.2615,
"step": 120000
},
{
"epoch": 3.67,
"learning_rate": 0.00024740962711292125,
"loss": 1.2558,
"step": 123000
},
{
"epoch": 3.76,
"learning_rate": 0.00024605301634866703,
"loss": 1.2537,
"step": 126000
},
{
"epoch": 3.85,
"learning_rate": 0.0002446964055844128,
"loss": 1.2393,
"step": 129000
},
{
"epoch": 3.94,
"learning_rate": 0.0002433397948201586,
"loss": 1.2438,
"step": 132000
},
{
"epoch": 4.03,
"learning_rate": 0.00024198318405590442,
"loss": 1.2277,
"step": 135000
},
{
"epoch": 4.12,
"learning_rate": 0.00024062657329165018,
"loss": 1.1934,
"step": 138000
},
{
"epoch": 4.21,
"learning_rate": 0.000239269962527396,
"loss": 1.1857,
"step": 141000
},
{
"epoch": 4.3,
"learning_rate": 0.00023791335176314175,
"loss": 1.1902,
"step": 144000
},
{
"epoch": 4.39,
"learning_rate": 0.00023655674099888756,
"loss": 1.1901,
"step": 147000
},
{
"epoch": 4.48,
"learning_rate": 0.00023520013023463332,
"loss": 1.1948,
"step": 150000
},
{
"epoch": 4.57,
"learning_rate": 0.00023384351947037914,
"loss": 1.1941,
"step": 153000
},
{
"epoch": 4.66,
"learning_rate": 0.00023248690870612495,
"loss": 1.1903,
"step": 156000
},
{
"epoch": 4.75,
"learning_rate": 0.0002311302979418707,
"loss": 1.1877,
"step": 159000
},
{
"epoch": 4.83,
"learning_rate": 0.00022977368717761652,
"loss": 1.1974,
"step": 162000
},
{
"epoch": 4.92,
"learning_rate": 0.00022841707641336228,
"loss": 1.189,
"step": 165000
},
{
"epoch": 5.01,
"learning_rate": 0.0002270604656491081,
"loss": 1.1851,
"step": 168000
},
{
"epoch": 5.1,
"learning_rate": 0.00022570385488485386,
"loss": 1.1479,
"step": 171000
},
{
"epoch": 5.19,
"learning_rate": 0.00022434724412059967,
"loss": 1.1374,
"step": 174000
},
{
"epoch": 5.28,
"learning_rate": 0.00022299063335634543,
"loss": 1.1343,
"step": 177000
},
{
"epoch": 5.37,
"learning_rate": 0.00022163402259209124,
"loss": 1.1306,
"step": 180000
},
{
"epoch": 5.46,
"learning_rate": 0.00022027741182783706,
"loss": 1.1399,
"step": 183000
},
{
"epoch": 5.55,
"learning_rate": 0.00021892080106358282,
"loss": 1.1457,
"step": 186000
},
{
"epoch": 5.64,
"learning_rate": 0.00021756419029932863,
"loss": 1.1469,
"step": 189000
},
{
"epoch": 5.73,
"learning_rate": 0.0002162075795350744,
"loss": 1.1448,
"step": 192000
},
{
"epoch": 5.82,
"learning_rate": 0.0002148509687708202,
"loss": 1.1397,
"step": 195000
},
{
"epoch": 5.91,
"learning_rate": 0.00021349435800656596,
"loss": 1.1441,
"step": 198000
},
{
"epoch": 6.0,
"learning_rate": 0.00021213774724231177,
"loss": 1.1453,
"step": 201000
},
{
"epoch": 6.09,
"learning_rate": 0.00021078113647805753,
"loss": 1.0897,
"step": 204000
},
{
"epoch": 6.18,
"learning_rate": 0.00020942452571380335,
"loss": 1.0956,
"step": 207000
},
{
"epoch": 6.27,
"learning_rate": 0.0002080679149495491,
"loss": 1.0947,
"step": 210000
},
{
"epoch": 6.36,
"learning_rate": 0.00020671130418529492,
"loss": 1.0961,
"step": 213000
},
{
"epoch": 6.45,
"learning_rate": 0.00020535469342104073,
"loss": 1.1117,
"step": 216000
},
{
"epoch": 6.54,
"learning_rate": 0.0002039980826567865,
"loss": 1.1032,
"step": 219000
},
{
"epoch": 6.63,
"learning_rate": 0.0002026414718925323,
"loss": 1.0983,
"step": 222000
},
{
"epoch": 6.72,
"learning_rate": 0.00020128486112827807,
"loss": 1.0885,
"step": 225000
},
{
"epoch": 6.8,
"learning_rate": 0.00019992825036402388,
"loss": 1.0867,
"step": 228000
},
{
"epoch": 6.89,
"learning_rate": 0.00019857163959976964,
"loss": 1.0993,
"step": 231000
},
{
"epoch": 6.98,
"learning_rate": 0.00019721502883551545,
"loss": 1.1021,
"step": 234000
},
{
"epoch": 7.07,
"learning_rate": 0.00019585841807126124,
"loss": 1.0519,
"step": 237000
},
{
"epoch": 7.16,
"learning_rate": 0.00019450180730700702,
"loss": 1.0594,
"step": 240000
},
{
"epoch": 7.25,
"learning_rate": 0.00019314519654275284,
"loss": 1.0555,
"step": 243000
},
{
"epoch": 7.34,
"learning_rate": 0.0001917885857784986,
"loss": 1.057,
"step": 246000
},
{
"epoch": 7.43,
"learning_rate": 0.0001904319750142444,
"loss": 1.0585,
"step": 249000
},
{
"epoch": 7.52,
"learning_rate": 0.00018907536424999017,
"loss": 1.0534,
"step": 252000
},
{
"epoch": 7.61,
"learning_rate": 0.00018771875348573598,
"loss": 1.0655,
"step": 255000
},
{
"epoch": 7.7,
"learning_rate": 0.00018636214272148174,
"loss": 1.056,
"step": 258000
},
{
"epoch": 7.79,
"learning_rate": 0.00018500553195722756,
"loss": 1.0638,
"step": 261000
},
{
"epoch": 7.88,
"learning_rate": 0.00018364892119297334,
"loss": 1.0521,
"step": 264000
},
{
"epoch": 7.97,
"learning_rate": 0.00018229231042871913,
"loss": 1.0633,
"step": 267000
},
{
"epoch": 8.06,
"learning_rate": 0.00018093569966446492,
"loss": 1.0345,
"step": 270000
},
{
"epoch": 8.15,
"learning_rate": 0.0001795790889002107,
"loss": 1.0186,
"step": 273000
},
{
"epoch": 8.24,
"learning_rate": 0.00017822247813595652,
"loss": 1.0141,
"step": 276000
},
{
"epoch": 8.33,
"learning_rate": 0.00017686586737170228,
"loss": 1.0184,
"step": 279000
},
{
"epoch": 8.42,
"learning_rate": 0.0001755092566074481,
"loss": 1.0184,
"step": 282000
},
{
"epoch": 8.51,
"learning_rate": 0.00017415264584319385,
"loss": 1.0222,
"step": 285000
},
{
"epoch": 8.6,
"learning_rate": 0.00017279603507893966,
"loss": 1.0176,
"step": 288000
},
{
"epoch": 8.68,
"learning_rate": 0.00017143942431468545,
"loss": 1.0236,
"step": 291000
},
{
"epoch": 8.77,
"learning_rate": 0.00017008281355043123,
"loss": 1.0278,
"step": 294000
},
{
"epoch": 8.86,
"learning_rate": 0.00016872620278617702,
"loss": 1.0076,
"step": 297000
},
{
"epoch": 8.95,
"learning_rate": 0.0001673695920219228,
"loss": 1.0248,
"step": 300000
},
{
"epoch": 9.04,
"learning_rate": 0.00016601298125766862,
"loss": 0.9915,
"step": 303000
},
{
"epoch": 9.13,
"learning_rate": 0.00016465637049341438,
"loss": 0.9759,
"step": 306000
},
{
"epoch": 9.22,
"learning_rate": 0.0001632997597291602,
"loss": 0.9813,
"step": 309000
},
{
"epoch": 9.31,
"learning_rate": 0.00016194314896490595,
"loss": 0.9853,
"step": 312000
},
{
"epoch": 9.4,
"learning_rate": 0.00016058653820065177,
"loss": 0.9808,
"step": 315000
},
{
"epoch": 9.49,
"learning_rate": 0.00015922992743639755,
"loss": 0.9759,
"step": 318000
},
{
"epoch": 9.58,
"learning_rate": 0.00015787331667214334,
"loss": 0.9852,
"step": 321000
},
{
"epoch": 9.67,
"learning_rate": 0.00015651670590788913,
"loss": 0.9796,
"step": 324000
},
{
"epoch": 9.76,
"learning_rate": 0.0001551600951436349,
"loss": 0.9871,
"step": 327000
},
{
"epoch": 9.85,
"learning_rate": 0.0001538034843793807,
"loss": 0.9953,
"step": 330000
},
{
"epoch": 9.94,
"learning_rate": 0.00015244687361512649,
"loss": 0.9883,
"step": 333000
},
{
"epoch": 10.03,
"learning_rate": 0.0001510902628508723,
"loss": 0.9735,
"step": 336000
},
{
"epoch": 10.12,
"learning_rate": 0.00014973365208661809,
"loss": 0.9509,
"step": 339000
},
{
"epoch": 10.21,
"learning_rate": 0.00014837704132236387,
"loss": 0.9448,
"step": 342000
},
{
"epoch": 10.3,
"learning_rate": 0.00014702043055810966,
"loss": 0.9395,
"step": 345000
},
{
"epoch": 10.39,
"learning_rate": 0.00014566381979385544,
"loss": 0.9438,
"step": 348000
},
{
"epoch": 10.48,
"learning_rate": 0.00014430720902960123,
"loss": 0.9498,
"step": 351000
},
{
"epoch": 10.57,
"learning_rate": 0.00014295059826534702,
"loss": 0.9481,
"step": 354000
},
{
"epoch": 10.65,
"learning_rate": 0.0001415939875010928,
"loss": 0.9509,
"step": 357000
},
{
"epoch": 10.74,
"learning_rate": 0.0001402373767368386,
"loss": 0.9527,
"step": 360000
},
{
"epoch": 10.83,
"learning_rate": 0.0001388807659725844,
"loss": 0.944,
"step": 363000
},
{
"epoch": 10.92,
"learning_rate": 0.0001375241552083302,
"loss": 0.9427,
"step": 366000
},
{
"epoch": 11.01,
"learning_rate": 0.00013616754444407598,
"loss": 0.9511,
"step": 369000
},
{
"epoch": 11.1,
"learning_rate": 0.00013481093367982176,
"loss": 0.901,
"step": 372000
},
{
"epoch": 11.19,
"learning_rate": 0.00013345432291556755,
"loss": 0.9175,
"step": 375000
},
{
"epoch": 11.28,
"learning_rate": 0.00013209771215131334,
"loss": 0.9061,
"step": 378000
},
{
"epoch": 11.37,
"learning_rate": 0.00013074110138705912,
"loss": 0.9175,
"step": 381000
},
{
"epoch": 11.46,
"learning_rate": 0.0001293844906228049,
"loss": 0.9175,
"step": 384000
},
{
"epoch": 11.55,
"learning_rate": 0.0001280278798585507,
"loss": 0.9149,
"step": 387000
},
{
"epoch": 11.64,
"learning_rate": 0.0001266712690942965,
"loss": 0.9155,
"step": 390000
},
{
"epoch": 11.73,
"learning_rate": 0.0001253146583300423,
"loss": 0.9129,
"step": 393000
},
{
"epoch": 11.82,
"learning_rate": 0.00012395804756578808,
"loss": 0.9178,
"step": 396000
},
{
"epoch": 11.91,
"learning_rate": 0.00012260143680153387,
"loss": 0.912,
"step": 399000
},
{
"epoch": 12.0,
"learning_rate": 0.00012124482603727964,
"loss": 0.9217,
"step": 402000
},
{
"epoch": 12.09,
"learning_rate": 0.00011988821527302545,
"loss": 0.8778,
"step": 405000
},
{
"epoch": 12.18,
"learning_rate": 0.00011853160450877124,
"loss": 0.8741,
"step": 408000
},
{
"epoch": 12.27,
"learning_rate": 0.00011717499374451703,
"loss": 0.8786,
"step": 411000
},
{
"epoch": 12.36,
"learning_rate": 0.00011581838298026281,
"loss": 0.8837,
"step": 414000
},
{
"epoch": 12.45,
"learning_rate": 0.0001144617722160086,
"loss": 0.883,
"step": 417000
},
{
"epoch": 12.53,
"learning_rate": 0.00011310516145175439,
"loss": 0.8764,
"step": 420000
},
{
"epoch": 12.62,
"learning_rate": 0.00011174855068750017,
"loss": 0.8881,
"step": 423000
},
{
"epoch": 12.71,
"learning_rate": 0.00011039193992324596,
"loss": 0.8844,
"step": 426000
},
{
"epoch": 12.8,
"learning_rate": 0.00010903532915899175,
"loss": 0.8838,
"step": 429000
},
{
"epoch": 12.89,
"learning_rate": 0.00010767871839473755,
"loss": 0.8799,
"step": 432000
},
{
"epoch": 12.98,
"learning_rate": 0.00010632210763048335,
"loss": 0.8766,
"step": 435000
},
{
"epoch": 13.07,
"learning_rate": 0.00010496549686622913,
"loss": 0.8562,
"step": 438000
},
{
"epoch": 13.16,
"learning_rate": 0.00010360888610197492,
"loss": 0.8445,
"step": 441000
},
{
"epoch": 13.25,
"learning_rate": 0.0001022522753377207,
"loss": 0.8422,
"step": 444000
},
{
"epoch": 13.34,
"learning_rate": 0.00010089566457346649,
"loss": 0.8405,
"step": 447000
},
{
"epoch": 13.43,
"learning_rate": 9.953905380921228e-05,
"loss": 0.8456,
"step": 450000
},
{
"epoch": 13.52,
"learning_rate": 9.818244304495806e-05,
"loss": 0.8516,
"step": 453000
},
{
"epoch": 13.61,
"learning_rate": 9.682583228070386e-05,
"loss": 0.8514,
"step": 456000
},
{
"epoch": 13.7,
"learning_rate": 9.546922151644965e-05,
"loss": 0.8465,
"step": 459000
},
{
"epoch": 13.79,
"learning_rate": 9.411261075219544e-05,
"loss": 0.8499,
"step": 462000
},
{
"epoch": 13.88,
"learning_rate": 9.275599998794124e-05,
"loss": 0.8582,
"step": 465000
},
{
"epoch": 13.97,
"learning_rate": 9.139938922368702e-05,
"loss": 0.8544,
"step": 468000
},
{
"epoch": 14.06,
"learning_rate": 9.004277845943281e-05,
"loss": 0.8226,
"step": 471000
},
{
"epoch": 14.15,
"learning_rate": 8.86861676951786e-05,
"loss": 0.8132,
"step": 474000
},
{
"epoch": 14.24,
"learning_rate": 8.732955693092438e-05,
"loss": 0.8196,
"step": 477000
},
{
"epoch": 14.33,
"learning_rate": 8.597294616667018e-05,
"loss": 0.8221,
"step": 480000
},
{
"epoch": 14.42,
"learning_rate": 8.461633540241597e-05,
"loss": 0.8155,
"step": 483000
},
{
"epoch": 14.5,
"learning_rate": 8.325972463816176e-05,
"loss": 0.8219,
"step": 486000
},
{
"epoch": 14.59,
"learning_rate": 8.190311387390754e-05,
"loss": 0.8171,
"step": 489000
},
{
"epoch": 14.68,
"learning_rate": 8.054650310965333e-05,
"loss": 0.8116,
"step": 492000
},
{
"epoch": 14.77,
"learning_rate": 7.918989234539913e-05,
"loss": 0.8213,
"step": 495000
},
{
"epoch": 14.86,
"learning_rate": 7.783328158114491e-05,
"loss": 0.8154,
"step": 498000
},
{
"epoch": 14.95,
"learning_rate": 7.64766708168907e-05,
"loss": 0.824,
"step": 501000
},
{
"epoch": 15.04,
"learning_rate": 7.512006005263649e-05,
"loss": 0.8068,
"step": 504000
},
{
"epoch": 15.13,
"learning_rate": 7.376344928838229e-05,
"loss": 0.7813,
"step": 507000
},
{
"epoch": 15.22,
"learning_rate": 7.240683852412807e-05,
"loss": 0.7947,
"step": 510000
},
{
"epoch": 15.31,
"learning_rate": 7.105022775987386e-05,
"loss": 0.7899,
"step": 513000
},
{
"epoch": 15.4,
"learning_rate": 6.969361699561965e-05,
"loss": 0.7885,
"step": 516000
},
{
"epoch": 15.49,
"learning_rate": 6.833700623136545e-05,
"loss": 0.7963,
"step": 519000
},
{
"epoch": 15.58,
"learning_rate": 6.698039546711123e-05,
"loss": 0.787,
"step": 522000
},
{
"epoch": 15.67,
"learning_rate": 6.562378470285702e-05,
"loss": 0.7877,
"step": 525000
},
{
"epoch": 15.76,
"learning_rate": 6.42671739386028e-05,
"loss": 0.7949,
"step": 528000
},
{
"epoch": 15.85,
"learning_rate": 6.291056317434859e-05,
"loss": 0.7835,
"step": 531000
},
{
"epoch": 15.94,
"learning_rate": 6.155395241009439e-05,
"loss": 0.7904,
"step": 534000
},
{
"epoch": 16.03,
"learning_rate": 6.019734164584017e-05,
"loss": 0.7797,
"step": 537000
},
{
"epoch": 16.12,
"learning_rate": 5.8840730881585965e-05,
"loss": 0.7606,
"step": 540000
},
{
"epoch": 16.21,
"learning_rate": 5.748412011733175e-05,
"loss": 0.7671,
"step": 543000
},
{
"epoch": 16.3,
"learning_rate": 5.6127509353077545e-05,
"loss": 0.764,
"step": 546000
},
{
"epoch": 16.38,
"learning_rate": 5.477089858882333e-05,
"loss": 0.758,
"step": 549000
},
{
"epoch": 16.47,
"learning_rate": 5.3414287824569124e-05,
"loss": 0.7518,
"step": 552000
},
{
"epoch": 16.56,
"learning_rate": 5.205767706031491e-05,
"loss": 0.7644,
"step": 555000
},
{
"epoch": 16.65,
"learning_rate": 5.07010662960607e-05,
"loss": 0.7577,
"step": 558000
},
{
"epoch": 16.74,
"learning_rate": 4.934445553180649e-05,
"loss": 0.762,
"step": 561000
},
{
"epoch": 16.83,
"learning_rate": 4.7987844767552283e-05,
"loss": 0.7548,
"step": 564000
},
{
"epoch": 16.92,
"learning_rate": 4.663123400329807e-05,
"loss": 0.7567,
"step": 567000
},
{
"epoch": 17.01,
"learning_rate": 4.5274623239043856e-05,
"loss": 0.7613,
"step": 570000
},
{
"epoch": 17.1,
"learning_rate": 4.391801247478964e-05,
"loss": 0.7346,
"step": 573000
},
{
"epoch": 17.19,
"learning_rate": 4.256140171053544e-05,
"loss": 0.7323,
"step": 576000
},
{
"epoch": 17.28,
"learning_rate": 4.120479094628123e-05,
"loss": 0.7322,
"step": 579000
},
{
"epoch": 17.37,
"learning_rate": 3.9848180182027016e-05,
"loss": 0.7456,
"step": 582000
},
{
"epoch": 17.46,
"learning_rate": 3.84915694177728e-05,
"loss": 0.7324,
"step": 585000
},
{
"epoch": 17.55,
"learning_rate": 3.7134958653518595e-05,
"loss": 0.7414,
"step": 588000
},
{
"epoch": 17.64,
"learning_rate": 3.577834788926438e-05,
"loss": 0.7334,
"step": 591000
},
{
"epoch": 17.73,
"learning_rate": 3.442173712501017e-05,
"loss": 0.731,
"step": 594000
},
{
"epoch": 17.82,
"learning_rate": 3.306512636075596e-05,
"loss": 0.7488,
"step": 597000
},
{
"epoch": 17.91,
"learning_rate": 3.170851559650175e-05,
"loss": 0.7287,
"step": 600000
},
{
"epoch": 18.0,
"learning_rate": 3.035190483224754e-05,
"loss": 0.7361,
"step": 603000
},
{
"epoch": 18.09,
"learning_rate": 2.8995294067993327e-05,
"loss": 0.7173,
"step": 606000
},
{
"epoch": 18.18,
"learning_rate": 2.763868330373912e-05,
"loss": 0.7173,
"step": 609000
},
{
"epoch": 18.27,
"learning_rate": 2.6282072539484907e-05,
"loss": 0.7138,
"step": 612000
},
{
"epoch": 18.35,
"learning_rate": 2.4925461775230697e-05,
"loss": 0.71,
"step": 615000
},
{
"epoch": 18.44,
"learning_rate": 2.3568851010976486e-05,
"loss": 0.7128,
"step": 618000
},
{
"epoch": 18.53,
"learning_rate": 2.2212240246722276e-05,
"loss": 0.7168,
"step": 621000
},
{
"epoch": 18.62,
"learning_rate": 2.0855629482468066e-05,
"loss": 0.7184,
"step": 624000
},
{
"epoch": 18.71,
"learning_rate": 1.9499018718213856e-05,
"loss": 0.7099,
"step": 627000
},
{
"epoch": 18.8,
"learning_rate": 1.8142407953959646e-05,
"loss": 0.7047,
"step": 630000
},
{
"epoch": 18.89,
"learning_rate": 1.6785797189705435e-05,
"loss": 0.7131,
"step": 633000
},
{
"epoch": 18.98,
"learning_rate": 1.5429186425451222e-05,
"loss": 0.7151,
"step": 636000
},
{
"epoch": 19.07,
"learning_rate": 1.4072575661197012e-05,
"loss": 0.7058,
"step": 639000
},
{
"epoch": 19.16,
"learning_rate": 1.2715964896942801e-05,
"loss": 0.6982,
"step": 642000
},
{
"epoch": 19.25,
"learning_rate": 1.1359354132688591e-05,
"loss": 0.6983,
"step": 645000
},
{
"epoch": 19.34,
"learning_rate": 1.0002743368434381e-05,
"loss": 0.6932,
"step": 648000
},
{
"epoch": 19.43,
"learning_rate": 8.64613260418017e-06,
"loss": 0.7025,
"step": 651000
},
{
"epoch": 19.52,
"learning_rate": 7.289521839925958e-06,
"loss": 0.6945,
"step": 654000
},
{
"epoch": 19.61,
"learning_rate": 5.932911075671748e-06,
"loss": 0.7039,
"step": 657000
},
{
"epoch": 19.7,
"learning_rate": 4.576300311417537e-06,
"loss": 0.6921,
"step": 660000
},
{
"epoch": 19.79,
"learning_rate": 3.2196895471633263e-06,
"loss": 0.6914,
"step": 663000
},
{
"epoch": 19.88,
"learning_rate": 1.863078782909116e-06,
"loss": 0.6969,
"step": 666000
},
{
"epoch": 19.97,
"learning_rate": 5.064680186549053e-07,
"loss": 0.6948,
"step": 669000
},
{
"epoch": 20.0,
"step": 670120,
"total_flos": 1.67362005330918e+21,
"train_loss": 1.0658713307571661,
"train_runtime": 426567.9926,
"train_samples_per_second": 25.136,
"train_steps_per_second": 1.571
}
],
"logging_steps": 3000,
"max_steps": 670120,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.67362005330918e+21,
"trial_name": null,
"trial_params": null
}