yesj1234's picture
Upload folder using huggingface_hub
85115cb
raw
history blame
No virus
78.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 28.999988422039806,
"eval_steps": 500,
"global_step": 1252379,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 4.6310589688175354e-05,
"loss": 4.5055,
"step": 2000
},
{
"epoch": 0.09,
"learning_rate": 9.262117937635071e-05,
"loss": 1.4851,
"step": 4000
},
{
"epoch": 0.14,
"learning_rate": 0.00013893176906452608,
"loss": 1.1808,
"step": 6000
},
{
"epoch": 0.19,
"learning_rate": 0.00018524235875270141,
"loss": 1.132,
"step": 8000
},
{
"epoch": 0.23,
"learning_rate": 0.0002315529484408768,
"loss": 1.1349,
"step": 10000
},
{
"epoch": 0.28,
"learning_rate": 0.00027786353812905216,
"loss": 1.1422,
"step": 12000
},
{
"epoch": 0.32,
"learning_rate": 0.0002997558073716234,
"loss": 1.1513,
"step": 14000
},
{
"epoch": 0.37,
"learning_rate": 0.0002992880054015534,
"loss": 1.142,
"step": 16000
},
{
"epoch": 0.42,
"learning_rate": 0.00029882020343148334,
"loss": 1.1303,
"step": 18000
},
{
"epoch": 0.46,
"learning_rate": 0.0002983524014614133,
"loss": 1.1215,
"step": 20000
},
{
"epoch": 0.51,
"learning_rate": 0.0002978845994913433,
"loss": 1.109,
"step": 22000
},
{
"epoch": 0.56,
"learning_rate": 0.00029741679752127325,
"loss": 1.1034,
"step": 24000
},
{
"epoch": 0.6,
"learning_rate": 0.0002969489955512032,
"loss": 1.0998,
"step": 26000
},
{
"epoch": 0.65,
"learning_rate": 0.00029648119358113324,
"loss": 1.0979,
"step": 28000
},
{
"epoch": 0.69,
"learning_rate": 0.00029601339161106316,
"loss": 1.083,
"step": 30000
},
{
"epoch": 0.74,
"learning_rate": 0.0002955455896409932,
"loss": 1.0741,
"step": 32000
},
{
"epoch": 0.79,
"learning_rate": 0.0002950777876709231,
"loss": 1.0741,
"step": 34000
},
{
"epoch": 0.83,
"learning_rate": 0.0002946099857008531,
"loss": 1.0707,
"step": 36000
},
{
"epoch": 0.88,
"learning_rate": 0.00029414218373078303,
"loss": 1.0689,
"step": 38000
},
{
"epoch": 0.93,
"learning_rate": 0.00029367438176071306,
"loss": 1.0536,
"step": 40000
},
{
"epoch": 0.97,
"learning_rate": 0.00029320657979064297,
"loss": 1.0556,
"step": 42000
},
{
"epoch": 1.02,
"learning_rate": 0.000292738777820573,
"loss": 1.0358,
"step": 44000
},
{
"epoch": 1.07,
"learning_rate": 0.0002922709758505029,
"loss": 1.018,
"step": 46000
},
{
"epoch": 1.11,
"learning_rate": 0.00029180317388043293,
"loss": 1.0162,
"step": 48000
},
{
"epoch": 1.16,
"learning_rate": 0.00029133537191036285,
"loss": 1.017,
"step": 50000
},
{
"epoch": 1.2,
"learning_rate": 0.00029086756994029287,
"loss": 1.0145,
"step": 52000
},
{
"epoch": 1.25,
"learning_rate": 0.0002903997679702228,
"loss": 1.0103,
"step": 54000
},
{
"epoch": 1.3,
"learning_rate": 0.0002899319660001528,
"loss": 1.0046,
"step": 56000
},
{
"epoch": 1.34,
"learning_rate": 0.0002894641640300827,
"loss": 1.01,
"step": 58000
},
{
"epoch": 1.39,
"learning_rate": 0.00028899636206001274,
"loss": 1.0146,
"step": 60000
},
{
"epoch": 1.44,
"learning_rate": 0.00028852856008994266,
"loss": 1.0122,
"step": 62000
},
{
"epoch": 1.48,
"learning_rate": 0.0002880607581198727,
"loss": 0.9961,
"step": 64000
},
{
"epoch": 1.53,
"learning_rate": 0.00028759295614980265,
"loss": 0.9914,
"step": 66000
},
{
"epoch": 1.57,
"learning_rate": 0.0002871251541797326,
"loss": 0.9936,
"step": 68000
},
{
"epoch": 1.62,
"learning_rate": 0.0002866573522096626,
"loss": 0.9998,
"step": 70000
},
{
"epoch": 1.67,
"learning_rate": 0.00028618955023959256,
"loss": 0.9833,
"step": 72000
},
{
"epoch": 1.71,
"learning_rate": 0.0002857217482695225,
"loss": 0.9901,
"step": 74000
},
{
"epoch": 1.76,
"learning_rate": 0.0002852539462994525,
"loss": 0.9905,
"step": 76000
},
{
"epoch": 1.81,
"learning_rate": 0.00028478614432938246,
"loss": 0.9845,
"step": 78000
},
{
"epoch": 1.85,
"learning_rate": 0.00028431834235931243,
"loss": 0.9886,
"step": 80000
},
{
"epoch": 1.9,
"learning_rate": 0.0002838505403892424,
"loss": 0.9848,
"step": 82000
},
{
"epoch": 1.95,
"learning_rate": 0.00028338273841917237,
"loss": 0.9827,
"step": 84000
},
{
"epoch": 1.99,
"learning_rate": 0.00028291493644910234,
"loss": 0.9764,
"step": 86000
},
{
"epoch": 2.04,
"learning_rate": 0.0002824471344790323,
"loss": 0.9546,
"step": 88000
},
{
"epoch": 2.08,
"learning_rate": 0.0002819793325089623,
"loss": 0.9388,
"step": 90000
},
{
"epoch": 2.13,
"learning_rate": 0.00028151153053889225,
"loss": 0.9432,
"step": 92000
},
{
"epoch": 2.18,
"learning_rate": 0.0002810437285688222,
"loss": 0.9591,
"step": 94000
},
{
"epoch": 2.22,
"learning_rate": 0.0002805759265987522,
"loss": 0.9431,
"step": 96000
},
{
"epoch": 2.27,
"learning_rate": 0.00028010812462868215,
"loss": 0.9388,
"step": 98000
},
{
"epoch": 2.32,
"learning_rate": 0.0002796403226586121,
"loss": 0.9421,
"step": 100000
},
{
"epoch": 2.36,
"learning_rate": 0.0002791725206885421,
"loss": 0.9452,
"step": 102000
},
{
"epoch": 2.41,
"learning_rate": 0.0002787047187184721,
"loss": 0.9535,
"step": 104000
},
{
"epoch": 2.45,
"learning_rate": 0.00027823691674840203,
"loss": 0.9373,
"step": 106000
},
{
"epoch": 2.5,
"learning_rate": 0.00027776911477833205,
"loss": 0.9345,
"step": 108000
},
{
"epoch": 2.55,
"learning_rate": 0.00027730131280826197,
"loss": 0.9478,
"step": 110000
},
{
"epoch": 2.59,
"learning_rate": 0.000276833510838192,
"loss": 0.9445,
"step": 112000
},
{
"epoch": 2.64,
"learning_rate": 0.0002763657088681219,
"loss": 0.9297,
"step": 114000
},
{
"epoch": 2.69,
"learning_rate": 0.0002758979068980519,
"loss": 0.9294,
"step": 116000
},
{
"epoch": 2.73,
"learning_rate": 0.00027543010492798184,
"loss": 0.9273,
"step": 118000
},
{
"epoch": 2.78,
"learning_rate": 0.00027496230295791186,
"loss": 0.9242,
"step": 120000
},
{
"epoch": 2.83,
"learning_rate": 0.0002744945009878418,
"loss": 0.9228,
"step": 122000
},
{
"epoch": 2.87,
"learning_rate": 0.0002740266990177718,
"loss": 0.9316,
"step": 124000
},
{
"epoch": 2.92,
"learning_rate": 0.0002735588970477017,
"loss": 0.9448,
"step": 126000
},
{
"epoch": 2.96,
"learning_rate": 0.00027309109507763174,
"loss": 0.9253,
"step": 128000
},
{
"epoch": 3.01,
"learning_rate": 0.00027262329310756165,
"loss": 0.9197,
"step": 130000
},
{
"epoch": 3.06,
"learning_rate": 0.0002721554911374917,
"loss": 0.9025,
"step": 132000
},
{
"epoch": 3.1,
"learning_rate": 0.0002716876891674216,
"loss": 0.8983,
"step": 134000
},
{
"epoch": 3.15,
"learning_rate": 0.0002712198871973516,
"loss": 0.9013,
"step": 136000
},
{
"epoch": 3.2,
"learning_rate": 0.0002707520852272816,
"loss": 0.9019,
"step": 138000
},
{
"epoch": 3.24,
"learning_rate": 0.00027028428325721155,
"loss": 0.9016,
"step": 140000
},
{
"epoch": 3.29,
"learning_rate": 0.0002698164812871415,
"loss": 0.8979,
"step": 142000
},
{
"epoch": 3.33,
"learning_rate": 0.0002693486793170715,
"loss": 0.8961,
"step": 144000
},
{
"epoch": 3.38,
"learning_rate": 0.00026888087734700146,
"loss": 0.9007,
"step": 146000
},
{
"epoch": 3.43,
"learning_rate": 0.00026841307537693143,
"loss": 0.8951,
"step": 148000
},
{
"epoch": 3.47,
"learning_rate": 0.0002679452734068614,
"loss": 0.8926,
"step": 150000
},
{
"epoch": 3.52,
"learning_rate": 0.00026747747143679137,
"loss": 0.8924,
"step": 152000
},
{
"epoch": 3.57,
"learning_rate": 0.00026700966946672134,
"loss": 0.9044,
"step": 154000
},
{
"epoch": 3.61,
"learning_rate": 0.0002665418674966513,
"loss": 0.8952,
"step": 156000
},
{
"epoch": 3.66,
"learning_rate": 0.00026607406552658127,
"loss": 0.9001,
"step": 158000
},
{
"epoch": 3.7,
"learning_rate": 0.00026560626355651124,
"loss": 0.8898,
"step": 160000
},
{
"epoch": 3.75,
"learning_rate": 0.0002651384615864412,
"loss": 0.895,
"step": 162000
},
{
"epoch": 3.8,
"learning_rate": 0.0002646706596163712,
"loss": 0.9015,
"step": 164000
},
{
"epoch": 3.84,
"learning_rate": 0.00026420285764630115,
"loss": 0.892,
"step": 166000
},
{
"epoch": 3.89,
"learning_rate": 0.0002637350556762311,
"loss": 0.8903,
"step": 168000
},
{
"epoch": 3.94,
"learning_rate": 0.0002632672537061611,
"loss": 0.8916,
"step": 170000
},
{
"epoch": 3.98,
"learning_rate": 0.00026279945173609105,
"loss": 0.8941,
"step": 172000
},
{
"epoch": 4.03,
"learning_rate": 0.000262331649766021,
"loss": 0.8771,
"step": 174000
},
{
"epoch": 4.08,
"learning_rate": 0.000261863847795951,
"loss": 0.8716,
"step": 176000
},
{
"epoch": 4.12,
"learning_rate": 0.00026139604582588096,
"loss": 0.8632,
"step": 178000
},
{
"epoch": 4.17,
"learning_rate": 0.00026092824385581093,
"loss": 0.8573,
"step": 180000
},
{
"epoch": 4.21,
"learning_rate": 0.0002604604418857409,
"loss": 0.8642,
"step": 182000
},
{
"epoch": 4.26,
"learning_rate": 0.00025999263991567087,
"loss": 0.8642,
"step": 184000
},
{
"epoch": 4.31,
"learning_rate": 0.00025952483794560084,
"loss": 0.8617,
"step": 186000
},
{
"epoch": 4.35,
"learning_rate": 0.0002590570359755308,
"loss": 0.8574,
"step": 188000
},
{
"epoch": 4.4,
"learning_rate": 0.0002585892340054608,
"loss": 0.8612,
"step": 190000
},
{
"epoch": 4.45,
"learning_rate": 0.00025812143203539074,
"loss": 0.8706,
"step": 192000
},
{
"epoch": 4.49,
"learning_rate": 0.0002576536300653207,
"loss": 0.8605,
"step": 194000
},
{
"epoch": 4.54,
"learning_rate": 0.0002571858280952507,
"loss": 0.8703,
"step": 196000
},
{
"epoch": 4.58,
"learning_rate": 0.00025671802612518065,
"loss": 0.8691,
"step": 198000
},
{
"epoch": 4.63,
"learning_rate": 0.0002562502241551106,
"loss": 0.873,
"step": 200000
},
{
"epoch": 4.68,
"learning_rate": 0.0002557824221850406,
"loss": 0.857,
"step": 202000
},
{
"epoch": 4.72,
"learning_rate": 0.00025531462021497056,
"loss": 0.8686,
"step": 204000
},
{
"epoch": 4.77,
"learning_rate": 0.0002548468182449005,
"loss": 0.868,
"step": 206000
},
{
"epoch": 4.82,
"learning_rate": 0.0002543790162748305,
"loss": 0.8756,
"step": 208000
},
{
"epoch": 4.86,
"learning_rate": 0.0002539112143047605,
"loss": 0.8656,
"step": 210000
},
{
"epoch": 4.91,
"learning_rate": 0.00025344341233469043,
"loss": 0.8725,
"step": 212000
},
{
"epoch": 4.96,
"learning_rate": 0.00025297561036462045,
"loss": 0.8675,
"step": 214000
},
{
"epoch": 5.0,
"learning_rate": 0.00025250780839455037,
"loss": 0.8644,
"step": 216000
},
{
"epoch": 5.05,
"learning_rate": 0.0002520400064244804,
"loss": 0.8397,
"step": 218000
},
{
"epoch": 5.09,
"learning_rate": 0.0002515722044544103,
"loss": 0.8374,
"step": 220000
},
{
"epoch": 5.14,
"learning_rate": 0.00025110440248434033,
"loss": 0.8364,
"step": 222000
},
{
"epoch": 5.19,
"learning_rate": 0.00025063660051427025,
"loss": 0.8454,
"step": 224000
},
{
"epoch": 5.23,
"learning_rate": 0.00025016879854420027,
"loss": 0.8367,
"step": 226000
},
{
"epoch": 5.28,
"learning_rate": 0.0002497009965741302,
"loss": 0.8275,
"step": 228000
},
{
"epoch": 5.33,
"learning_rate": 0.0002492331946040602,
"loss": 0.8398,
"step": 230000
},
{
"epoch": 5.37,
"learning_rate": 0.0002487653926339901,
"loss": 0.8403,
"step": 232000
},
{
"epoch": 5.42,
"learning_rate": 0.00024829759066392014,
"loss": 0.8409,
"step": 234000
},
{
"epoch": 5.46,
"learning_rate": 0.00024782978869385006,
"loss": 0.8366,
"step": 236000
},
{
"epoch": 5.51,
"learning_rate": 0.0002473619867237801,
"loss": 0.8381,
"step": 238000
},
{
"epoch": 5.56,
"learning_rate": 0.00024689418475371,
"loss": 0.842,
"step": 240000
},
{
"epoch": 5.6,
"learning_rate": 0.00024642638278364,
"loss": 0.843,
"step": 242000
},
{
"epoch": 5.65,
"learning_rate": 0.00024595858081357,
"loss": 0.8489,
"step": 244000
},
{
"epoch": 5.7,
"learning_rate": 0.00024549077884349996,
"loss": 0.8313,
"step": 246000
},
{
"epoch": 5.74,
"learning_rate": 0.0002450229768734299,
"loss": 0.8468,
"step": 248000
},
{
"epoch": 5.79,
"learning_rate": 0.0002445551749033599,
"loss": 0.8446,
"step": 250000
},
{
"epoch": 5.84,
"learning_rate": 0.00024408737293328986,
"loss": 0.8329,
"step": 252000
},
{
"epoch": 5.88,
"learning_rate": 0.00024361957096321983,
"loss": 0.8402,
"step": 254000
},
{
"epoch": 5.93,
"learning_rate": 0.0002431517689931498,
"loss": 0.8502,
"step": 256000
},
{
"epoch": 5.97,
"learning_rate": 0.00024268396702307977,
"loss": 0.8437,
"step": 258000
},
{
"epoch": 6.02,
"learning_rate": 0.00024221616505300974,
"loss": 0.8341,
"step": 260000
},
{
"epoch": 6.07,
"learning_rate": 0.0002417483630829397,
"loss": 0.8152,
"step": 262000
},
{
"epoch": 6.11,
"learning_rate": 0.00024128056111286968,
"loss": 0.8189,
"step": 264000
},
{
"epoch": 6.16,
"learning_rate": 0.00024081275914279965,
"loss": 0.8193,
"step": 266000
},
{
"epoch": 6.21,
"learning_rate": 0.00024034495717272961,
"loss": 0.825,
"step": 268000
},
{
"epoch": 6.25,
"learning_rate": 0.00023987715520265958,
"loss": 0.818,
"step": 270000
},
{
"epoch": 6.3,
"learning_rate": 0.00023940935323258955,
"loss": 0.8204,
"step": 272000
},
{
"epoch": 6.34,
"learning_rate": 0.00023894155126251952,
"loss": 0.823,
"step": 274000
},
{
"epoch": 6.39,
"learning_rate": 0.0002384737492924495,
"loss": 0.8179,
"step": 276000
},
{
"epoch": 6.44,
"learning_rate": 0.00023800594732237946,
"loss": 0.8152,
"step": 278000
},
{
"epoch": 6.48,
"learning_rate": 0.00023753814535230943,
"loss": 0.8178,
"step": 280000
},
{
"epoch": 6.53,
"learning_rate": 0.00023707034338223942,
"loss": 0.8212,
"step": 282000
},
{
"epoch": 6.58,
"learning_rate": 0.00023660254141216937,
"loss": 0.8161,
"step": 284000
},
{
"epoch": 6.62,
"learning_rate": 0.00023613473944209936,
"loss": 0.8162,
"step": 286000
},
{
"epoch": 6.67,
"learning_rate": 0.0002356669374720293,
"loss": 0.828,
"step": 288000
},
{
"epoch": 6.72,
"learning_rate": 0.0002351991355019593,
"loss": 0.823,
"step": 290000
},
{
"epoch": 6.76,
"learning_rate": 0.00023473133353188927,
"loss": 0.8156,
"step": 292000
},
{
"epoch": 6.81,
"learning_rate": 0.00023426353156181924,
"loss": 0.8176,
"step": 294000
},
{
"epoch": 6.85,
"learning_rate": 0.0002337957295917492,
"loss": 0.8226,
"step": 296000
},
{
"epoch": 6.9,
"learning_rate": 0.00023332792762167917,
"loss": 0.8189,
"step": 298000
},
{
"epoch": 6.95,
"learning_rate": 0.00023286012565160914,
"loss": 0.8082,
"step": 300000
},
{
"epoch": 6.99,
"learning_rate": 0.0002323923236815391,
"loss": 0.8257,
"step": 302000
},
{
"epoch": 7.04,
"learning_rate": 0.00023192452171146908,
"loss": 0.7925,
"step": 304000
},
{
"epoch": 7.09,
"learning_rate": 0.00023145671974139905,
"loss": 0.7902,
"step": 306000
},
{
"epoch": 7.13,
"learning_rate": 0.00023098891777132902,
"loss": 0.7994,
"step": 308000
},
{
"epoch": 7.18,
"learning_rate": 0.000230521115801259,
"loss": 0.8029,
"step": 310000
},
{
"epoch": 7.22,
"learning_rate": 0.00023005331383118896,
"loss": 0.7983,
"step": 312000
},
{
"epoch": 7.27,
"learning_rate": 0.00022958551186111893,
"loss": 0.797,
"step": 314000
},
{
"epoch": 7.32,
"learning_rate": 0.00022911770989104892,
"loss": 0.8009,
"step": 316000
},
{
"epoch": 7.36,
"learning_rate": 0.00022864990792097886,
"loss": 0.7979,
"step": 318000
},
{
"epoch": 7.41,
"learning_rate": 0.00022818210595090886,
"loss": 0.8055,
"step": 320000
},
{
"epoch": 7.46,
"learning_rate": 0.0002277143039808388,
"loss": 0.7971,
"step": 322000
},
{
"epoch": 7.5,
"learning_rate": 0.0002272465020107688,
"loss": 0.8066,
"step": 324000
},
{
"epoch": 7.55,
"learning_rate": 0.00022677870004069874,
"loss": 0.7975,
"step": 326000
},
{
"epoch": 7.6,
"learning_rate": 0.00022631089807062873,
"loss": 0.7912,
"step": 328000
},
{
"epoch": 7.64,
"learning_rate": 0.00022584309610055868,
"loss": 0.7988,
"step": 330000
},
{
"epoch": 7.69,
"learning_rate": 0.00022537529413048867,
"loss": 0.7999,
"step": 332000
},
{
"epoch": 7.73,
"learning_rate": 0.00022490749216041861,
"loss": 0.8019,
"step": 334000
},
{
"epoch": 7.78,
"learning_rate": 0.0002244396901903486,
"loss": 0.8108,
"step": 336000
},
{
"epoch": 7.83,
"learning_rate": 0.00022397188822027855,
"loss": 0.8075,
"step": 338000
},
{
"epoch": 7.87,
"learning_rate": 0.00022350408625020855,
"loss": 0.7995,
"step": 340000
},
{
"epoch": 7.92,
"learning_rate": 0.0002230362842801385,
"loss": 0.802,
"step": 342000
},
{
"epoch": 7.97,
"learning_rate": 0.00022256848231006848,
"loss": 0.7998,
"step": 344000
},
{
"epoch": 8.01,
"learning_rate": 0.00022210068033999843,
"loss": 0.8023,
"step": 346000
},
{
"epoch": 8.06,
"learning_rate": 0.00022163287836992842,
"loss": 0.7748,
"step": 348000
},
{
"epoch": 8.1,
"learning_rate": 0.00022116507639985836,
"loss": 0.7839,
"step": 350000
},
{
"epoch": 8.15,
"learning_rate": 0.00022069727442978836,
"loss": 0.7806,
"step": 352000
},
{
"epoch": 8.2,
"learning_rate": 0.00022022947245971836,
"loss": 0.7775,
"step": 354000
},
{
"epoch": 8.24,
"learning_rate": 0.0002197616704896483,
"loss": 0.7734,
"step": 356000
},
{
"epoch": 8.29,
"learning_rate": 0.0002192938685195783,
"loss": 0.7728,
"step": 358000
},
{
"epoch": 8.34,
"learning_rate": 0.00021882606654950824,
"loss": 0.7879,
"step": 360000
},
{
"epoch": 8.38,
"learning_rate": 0.00021835826457943823,
"loss": 0.7891,
"step": 362000
},
{
"epoch": 8.43,
"learning_rate": 0.00021789046260936817,
"loss": 0.7922,
"step": 364000
},
{
"epoch": 8.48,
"learning_rate": 0.00021742266063929817,
"loss": 0.7837,
"step": 366000
},
{
"epoch": 8.52,
"learning_rate": 0.0002169548586692281,
"loss": 0.7838,
"step": 368000
},
{
"epoch": 8.57,
"learning_rate": 0.0002164870566991581,
"loss": 0.7797,
"step": 370000
},
{
"epoch": 8.61,
"learning_rate": 0.00021601925472908805,
"loss": 0.7818,
"step": 372000
},
{
"epoch": 8.66,
"learning_rate": 0.00021555145275901804,
"loss": 0.7838,
"step": 374000
},
{
"epoch": 8.71,
"learning_rate": 0.000215083650788948,
"loss": 0.7828,
"step": 376000
},
{
"epoch": 8.75,
"learning_rate": 0.00021461584881887798,
"loss": 0.7822,
"step": 378000
},
{
"epoch": 8.8,
"learning_rate": 0.00021414804684880792,
"loss": 0.7952,
"step": 380000
},
{
"epoch": 8.85,
"learning_rate": 0.00021368024487873792,
"loss": 0.7888,
"step": 382000
},
{
"epoch": 8.89,
"learning_rate": 0.00021321244290866786,
"loss": 0.7813,
"step": 384000
},
{
"epoch": 8.94,
"learning_rate": 0.00021274464093859786,
"loss": 0.7784,
"step": 386000
},
{
"epoch": 8.98,
"learning_rate": 0.00021227683896852783,
"loss": 0.776,
"step": 388000
},
{
"epoch": 9.03,
"learning_rate": 0.0002118090369984578,
"loss": 0.7686,
"step": 390000
},
{
"epoch": 9.08,
"learning_rate": 0.0002113412350283878,
"loss": 0.7576,
"step": 392000
},
{
"epoch": 9.12,
"learning_rate": 0.00021087343305831773,
"loss": 0.7611,
"step": 394000
},
{
"epoch": 9.17,
"learning_rate": 0.00021040563108824773,
"loss": 0.7698,
"step": 396000
},
{
"epoch": 9.22,
"learning_rate": 0.00020993782911817767,
"loss": 0.7632,
"step": 398000
},
{
"epoch": 9.26,
"learning_rate": 0.00020947002714810767,
"loss": 0.7725,
"step": 400000
},
{
"epoch": 9.31,
"learning_rate": 0.0002090022251780376,
"loss": 0.7706,
"step": 402000
},
{
"epoch": 9.35,
"learning_rate": 0.0002085344232079676,
"loss": 0.7709,
"step": 404000
},
{
"epoch": 9.4,
"learning_rate": 0.00020806662123789755,
"loss": 0.7651,
"step": 406000
},
{
"epoch": 9.45,
"learning_rate": 0.00020759881926782754,
"loss": 0.7657,
"step": 408000
},
{
"epoch": 9.49,
"learning_rate": 0.00020713101729775748,
"loss": 0.7589,
"step": 410000
},
{
"epoch": 9.54,
"learning_rate": 0.00020666321532768748,
"loss": 0.7683,
"step": 412000
},
{
"epoch": 9.59,
"learning_rate": 0.00020619541335761742,
"loss": 0.7684,
"step": 414000
},
{
"epoch": 9.63,
"learning_rate": 0.00020572761138754742,
"loss": 0.7756,
"step": 416000
},
{
"epoch": 9.68,
"learning_rate": 0.00020525980941747736,
"loss": 0.7653,
"step": 418000
},
{
"epoch": 9.73,
"learning_rate": 0.00020479200744740736,
"loss": 0.7718,
"step": 420000
},
{
"epoch": 9.77,
"learning_rate": 0.0002043242054773373,
"loss": 0.7676,
"step": 422000
},
{
"epoch": 9.82,
"learning_rate": 0.0002038564035072673,
"loss": 0.772,
"step": 424000
},
{
"epoch": 9.86,
"learning_rate": 0.00020338860153719726,
"loss": 0.766,
"step": 426000
},
{
"epoch": 9.91,
"learning_rate": 0.00020292079956712723,
"loss": 0.7739,
"step": 428000
},
{
"epoch": 9.96,
"learning_rate": 0.0002024529975970572,
"loss": 0.7743,
"step": 430000
},
{
"epoch": 10.0,
"learning_rate": 0.00020198519562698717,
"loss": 0.7719,
"step": 432000
},
{
"epoch": 10.05,
"learning_rate": 0.00020151739365691714,
"loss": 0.7443,
"step": 434000
},
{
"epoch": 10.1,
"learning_rate": 0.0002010495916868471,
"loss": 0.7573,
"step": 436000
},
{
"epoch": 10.14,
"learning_rate": 0.00020058178971677708,
"loss": 0.7546,
"step": 438000
},
{
"epoch": 10.19,
"learning_rate": 0.00020011398774670704,
"loss": 0.7516,
"step": 440000
},
{
"epoch": 10.23,
"learning_rate": 0.000199646185776637,
"loss": 0.7444,
"step": 442000
},
{
"epoch": 10.28,
"learning_rate": 0.00019917838380656698,
"loss": 0.7656,
"step": 444000
},
{
"epoch": 10.33,
"learning_rate": 0.00019871058183649695,
"loss": 0.7452,
"step": 446000
},
{
"epoch": 10.37,
"learning_rate": 0.00019824277986642692,
"loss": 0.7555,
"step": 448000
},
{
"epoch": 10.42,
"learning_rate": 0.0001977749778963569,
"loss": 0.7486,
"step": 450000
},
{
"epoch": 10.47,
"learning_rate": 0.00019730717592628686,
"loss": 0.7509,
"step": 452000
},
{
"epoch": 10.51,
"learning_rate": 0.00019683937395621683,
"loss": 0.7484,
"step": 454000
},
{
"epoch": 10.56,
"learning_rate": 0.0001963715719861468,
"loss": 0.7554,
"step": 456000
},
{
"epoch": 10.61,
"learning_rate": 0.00019590377001607676,
"loss": 0.7557,
"step": 458000
},
{
"epoch": 10.65,
"learning_rate": 0.00019543596804600676,
"loss": 0.7603,
"step": 460000
},
{
"epoch": 10.7,
"learning_rate": 0.00019496816607593673,
"loss": 0.7577,
"step": 462000
},
{
"epoch": 10.74,
"learning_rate": 0.0001945003641058667,
"loss": 0.7641,
"step": 464000
},
{
"epoch": 10.79,
"learning_rate": 0.00019403256213579667,
"loss": 0.7648,
"step": 466000
},
{
"epoch": 10.84,
"learning_rate": 0.00019356476016572664,
"loss": 0.755,
"step": 468000
},
{
"epoch": 10.88,
"learning_rate": 0.0001930969581956566,
"loss": 0.7445,
"step": 470000
},
{
"epoch": 10.93,
"learning_rate": 0.00019262915622558657,
"loss": 0.7614,
"step": 472000
},
{
"epoch": 10.98,
"learning_rate": 0.00019216135425551654,
"loss": 0.7526,
"step": 474000
},
{
"epoch": 11.02,
"learning_rate": 0.0001916935522854465,
"loss": 0.7493,
"step": 476000
},
{
"epoch": 11.07,
"learning_rate": 0.00019122575031537648,
"loss": 0.7299,
"step": 478000
},
{
"epoch": 11.11,
"learning_rate": 0.00019075794834530645,
"loss": 0.7379,
"step": 480000
},
{
"epoch": 11.16,
"learning_rate": 0.00019029014637523642,
"loss": 0.7365,
"step": 482000
},
{
"epoch": 11.21,
"learning_rate": 0.00018982234440516639,
"loss": 0.7402,
"step": 484000
},
{
"epoch": 11.25,
"learning_rate": 0.00018935454243509636,
"loss": 0.7409,
"step": 486000
},
{
"epoch": 11.3,
"learning_rate": 0.00018888674046502632,
"loss": 0.7294,
"step": 488000
},
{
"epoch": 11.35,
"learning_rate": 0.0001884189384949563,
"loss": 0.7467,
"step": 490000
},
{
"epoch": 11.39,
"learning_rate": 0.00018795113652488626,
"loss": 0.7357,
"step": 492000
},
{
"epoch": 11.44,
"learning_rate": 0.00018748333455481623,
"loss": 0.744,
"step": 494000
},
{
"epoch": 11.49,
"learning_rate": 0.0001870155325847462,
"loss": 0.741,
"step": 496000
},
{
"epoch": 11.53,
"learning_rate": 0.0001865477306146762,
"loss": 0.7404,
"step": 498000
},
{
"epoch": 11.58,
"learning_rate": 0.00018607992864460614,
"loss": 0.749,
"step": 500000
},
{
"epoch": 11.62,
"learning_rate": 0.00018561212667453613,
"loss": 0.7388,
"step": 502000
},
{
"epoch": 11.67,
"learning_rate": 0.00018514432470446607,
"loss": 0.742,
"step": 504000
},
{
"epoch": 11.72,
"learning_rate": 0.00018467652273439607,
"loss": 0.7481,
"step": 506000
},
{
"epoch": 11.76,
"learning_rate": 0.000184208720764326,
"loss": 0.7553,
"step": 508000
},
{
"epoch": 11.81,
"learning_rate": 0.000183740918794256,
"loss": 0.7457,
"step": 510000
},
{
"epoch": 11.86,
"learning_rate": 0.00018327311682418595,
"loss": 0.7447,
"step": 512000
},
{
"epoch": 11.9,
"learning_rate": 0.00018280531485411595,
"loss": 0.752,
"step": 514000
},
{
"epoch": 11.95,
"learning_rate": 0.0001823375128840459,
"loss": 0.7419,
"step": 516000
},
{
"epoch": 11.99,
"learning_rate": 0.00018186971091397588,
"loss": 0.7412,
"step": 518000
},
{
"epoch": 12.04,
"learning_rate": 0.00018140190894390583,
"loss": 0.7262,
"step": 520000
},
{
"epoch": 12.09,
"learning_rate": 0.00018093410697383582,
"loss": 0.7299,
"step": 522000
},
{
"epoch": 12.13,
"learning_rate": 0.00018046630500376576,
"loss": 0.7279,
"step": 524000
},
{
"epoch": 12.18,
"learning_rate": 0.00017999850303369576,
"loss": 0.7308,
"step": 526000
},
{
"epoch": 12.23,
"learning_rate": 0.0001795307010636257,
"loss": 0.7305,
"step": 528000
},
{
"epoch": 12.27,
"learning_rate": 0.0001790628990935557,
"loss": 0.7348,
"step": 530000
},
{
"epoch": 12.32,
"learning_rate": 0.0001785950971234857,
"loss": 0.7312,
"step": 532000
},
{
"epoch": 12.37,
"learning_rate": 0.00017812729515341563,
"loss": 0.7275,
"step": 534000
},
{
"epoch": 12.41,
"learning_rate": 0.00017765949318334563,
"loss": 0.7291,
"step": 536000
},
{
"epoch": 12.46,
"learning_rate": 0.00017719169121327557,
"loss": 0.7265,
"step": 538000
},
{
"epoch": 12.5,
"learning_rate": 0.00017672388924320557,
"loss": 0.7224,
"step": 540000
},
{
"epoch": 12.55,
"learning_rate": 0.0001762560872731355,
"loss": 0.7232,
"step": 542000
},
{
"epoch": 12.6,
"learning_rate": 0.0001757882853030655,
"loss": 0.7272,
"step": 544000
},
{
"epoch": 12.64,
"learning_rate": 0.00017532048333299545,
"loss": 0.7305,
"step": 546000
},
{
"epoch": 12.69,
"learning_rate": 0.00017485268136292544,
"loss": 0.7375,
"step": 548000
},
{
"epoch": 12.74,
"learning_rate": 0.00017438487939285539,
"loss": 0.7377,
"step": 550000
},
{
"epoch": 12.78,
"learning_rate": 0.00017391707742278538,
"loss": 0.7278,
"step": 552000
},
{
"epoch": 12.83,
"learning_rate": 0.00017344927545271532,
"loss": 0.7369,
"step": 554000
},
{
"epoch": 12.87,
"learning_rate": 0.00017298147348264532,
"loss": 0.7366,
"step": 556000
},
{
"epoch": 12.92,
"learning_rate": 0.00017251367151257526,
"loss": 0.736,
"step": 558000
},
{
"epoch": 12.97,
"learning_rate": 0.00017204586954250526,
"loss": 0.737,
"step": 560000
},
{
"epoch": 13.01,
"learning_rate": 0.0001715780675724352,
"loss": 0.7301,
"step": 562000
},
{
"epoch": 13.06,
"learning_rate": 0.0001711102656023652,
"loss": 0.7003,
"step": 564000
},
{
"epoch": 13.11,
"learning_rate": 0.00017064246363229514,
"loss": 0.7132,
"step": 566000
},
{
"epoch": 13.15,
"learning_rate": 0.00017017466166222513,
"loss": 0.7178,
"step": 568000
},
{
"epoch": 13.2,
"learning_rate": 0.00016970685969215513,
"loss": 0.7187,
"step": 570000
},
{
"epoch": 13.25,
"learning_rate": 0.00016923905772208507,
"loss": 0.7239,
"step": 572000
},
{
"epoch": 13.29,
"learning_rate": 0.00016877125575201507,
"loss": 0.7271,
"step": 574000
},
{
"epoch": 13.34,
"learning_rate": 0.000168303453781945,
"loss": 0.7208,
"step": 576000
},
{
"epoch": 13.38,
"learning_rate": 0.000167835651811875,
"loss": 0.7199,
"step": 578000
},
{
"epoch": 13.43,
"learning_rate": 0.00016736784984180495,
"loss": 0.7094,
"step": 580000
},
{
"epoch": 13.48,
"learning_rate": 0.00016690004787173494,
"loss": 0.7114,
"step": 582000
},
{
"epoch": 13.52,
"learning_rate": 0.00016643224590166488,
"loss": 0.7196,
"step": 584000
},
{
"epoch": 13.57,
"learning_rate": 0.00016596444393159488,
"loss": 0.7222,
"step": 586000
},
{
"epoch": 13.62,
"learning_rate": 0.00016549664196152482,
"loss": 0.7345,
"step": 588000
},
{
"epoch": 13.66,
"learning_rate": 0.00016502883999145482,
"loss": 0.7208,
"step": 590000
},
{
"epoch": 13.71,
"learning_rate": 0.00016456103802138476,
"loss": 0.7298,
"step": 592000
},
{
"epoch": 13.75,
"learning_rate": 0.00016409323605131475,
"loss": 0.7324,
"step": 594000
},
{
"epoch": 13.8,
"learning_rate": 0.0001636254340812447,
"loss": 0.7243,
"step": 596000
},
{
"epoch": 13.85,
"learning_rate": 0.0001631576321111747,
"loss": 0.7215,
"step": 598000
},
{
"epoch": 13.89,
"learning_rate": 0.00016268983014110463,
"loss": 0.7246,
"step": 600000
},
{
"epoch": 13.94,
"learning_rate": 0.00016222202817103463,
"loss": 0.7219,
"step": 602000
},
{
"epoch": 13.99,
"learning_rate": 0.0001617542262009646,
"loss": 0.7248,
"step": 604000
},
{
"epoch": 14.03,
"learning_rate": 0.00016128642423089457,
"loss": 0.7139,
"step": 606000
},
{
"epoch": 14.08,
"learning_rate": 0.00016081862226082454,
"loss": 0.7026,
"step": 608000
},
{
"epoch": 14.13,
"learning_rate": 0.0001603508202907545,
"loss": 0.7107,
"step": 610000
},
{
"epoch": 14.17,
"learning_rate": 0.00015988301832068447,
"loss": 0.7037,
"step": 612000
},
{
"epoch": 14.22,
"learning_rate": 0.00015941521635061444,
"loss": 0.7145,
"step": 614000
},
{
"epoch": 14.26,
"learning_rate": 0.0001589474143805444,
"loss": 0.7181,
"step": 616000
},
{
"epoch": 14.31,
"learning_rate": 0.00015847961241047438,
"loss": 0.7026,
"step": 618000
},
{
"epoch": 14.36,
"learning_rate": 0.00015801181044040435,
"loss": 0.7142,
"step": 620000
},
{
"epoch": 14.4,
"learning_rate": 0.00015754400847033432,
"loss": 0.7087,
"step": 622000
},
{
"epoch": 14.45,
"learning_rate": 0.0001570762065002643,
"loss": 0.7109,
"step": 624000
},
{
"epoch": 14.5,
"learning_rate": 0.00015660840453019426,
"loss": 0.7031,
"step": 626000
},
{
"epoch": 14.54,
"learning_rate": 0.00015614060256012425,
"loss": 0.7101,
"step": 628000
},
{
"epoch": 14.59,
"learning_rate": 0.0001556728005900542,
"loss": 0.7152,
"step": 630000
},
{
"epoch": 14.63,
"learning_rate": 0.0001552049986199842,
"loss": 0.7147,
"step": 632000
},
{
"epoch": 14.68,
"learning_rate": 0.00015473719664991413,
"loss": 0.7144,
"step": 634000
},
{
"epoch": 14.73,
"learning_rate": 0.00015426939467984413,
"loss": 0.7113,
"step": 636000
},
{
"epoch": 14.77,
"learning_rate": 0.00015380159270977407,
"loss": 0.7071,
"step": 638000
},
{
"epoch": 14.82,
"learning_rate": 0.00015333379073970407,
"loss": 0.7118,
"step": 640000
},
{
"epoch": 14.87,
"learning_rate": 0.00015286598876963403,
"loss": 0.7098,
"step": 642000
},
{
"epoch": 14.91,
"learning_rate": 0.000152398186799564,
"loss": 0.706,
"step": 644000
},
{
"epoch": 14.96,
"learning_rate": 0.00015193038482949397,
"loss": 0.709,
"step": 646000
},
{
"epoch": 15.01,
"learning_rate": 0.00015146258285942394,
"loss": 0.7087,
"step": 648000
},
{
"epoch": 15.05,
"learning_rate": 0.0001509947808893539,
"loss": 0.6983,
"step": 650000
},
{
"epoch": 15.1,
"learning_rate": 0.00015052697891928388,
"loss": 0.693,
"step": 652000
},
{
"epoch": 15.14,
"learning_rate": 0.00015005917694921385,
"loss": 0.6953,
"step": 654000
},
{
"epoch": 15.19,
"learning_rate": 0.00014959137497914382,
"loss": 0.6994,
"step": 656000
},
{
"epoch": 15.24,
"learning_rate": 0.00014912357300907379,
"loss": 0.6975,
"step": 658000
},
{
"epoch": 15.28,
"learning_rate": 0.00014865577103900375,
"loss": 0.7047,
"step": 660000
},
{
"epoch": 15.33,
"learning_rate": 0.00014818796906893372,
"loss": 0.6975,
"step": 662000
},
{
"epoch": 15.38,
"learning_rate": 0.0001477201670988637,
"loss": 0.704,
"step": 664000
},
{
"epoch": 15.42,
"learning_rate": 0.00014725236512879366,
"loss": 0.7042,
"step": 666000
},
{
"epoch": 15.47,
"learning_rate": 0.00014678456315872363,
"loss": 0.6917,
"step": 668000
},
{
"epoch": 15.51,
"learning_rate": 0.0001463167611886536,
"loss": 0.6914,
"step": 670000
},
{
"epoch": 15.56,
"learning_rate": 0.00014584895921858357,
"loss": 0.7018,
"step": 672000
},
{
"epoch": 15.61,
"learning_rate": 0.00014538115724851354,
"loss": 0.7016,
"step": 674000
},
{
"epoch": 15.65,
"learning_rate": 0.0001449133552784435,
"loss": 0.7078,
"step": 676000
},
{
"epoch": 15.7,
"learning_rate": 0.00014444555330837347,
"loss": 0.6932,
"step": 678000
},
{
"epoch": 15.75,
"learning_rate": 0.00014397775133830344,
"loss": 0.6964,
"step": 680000
},
{
"epoch": 15.79,
"learning_rate": 0.0001435099493682334,
"loss": 0.6997,
"step": 682000
},
{
"epoch": 15.84,
"learning_rate": 0.00014304214739816338,
"loss": 0.7065,
"step": 684000
},
{
"epoch": 15.88,
"learning_rate": 0.00014257434542809335,
"loss": 0.7047,
"step": 686000
},
{
"epoch": 15.93,
"learning_rate": 0.00014210654345802332,
"loss": 0.7154,
"step": 688000
},
{
"epoch": 15.98,
"learning_rate": 0.0001416387414879533,
"loss": 0.6993,
"step": 690000
},
{
"epoch": 16.02,
"learning_rate": 0.00014117093951788326,
"loss": 0.6969,
"step": 692000
},
{
"epoch": 16.07,
"learning_rate": 0.00014070313754781325,
"loss": 0.689,
"step": 694000
},
{
"epoch": 16.12,
"learning_rate": 0.00014023533557774322,
"loss": 0.6888,
"step": 696000
},
{
"epoch": 16.16,
"learning_rate": 0.0001397675336076732,
"loss": 0.6818,
"step": 698000
},
{
"epoch": 16.21,
"learning_rate": 0.00013929973163760316,
"loss": 0.693,
"step": 700000
},
{
"epoch": 16.26,
"learning_rate": 0.00013883192966753313,
"loss": 0.6909,
"step": 702000
},
{
"epoch": 16.3,
"learning_rate": 0.0001383641276974631,
"loss": 0.6873,
"step": 704000
},
{
"epoch": 16.35,
"learning_rate": 0.00013789632572739307,
"loss": 0.6906,
"step": 706000
},
{
"epoch": 16.39,
"learning_rate": 0.00013742852375732303,
"loss": 0.6866,
"step": 708000
},
{
"epoch": 16.44,
"learning_rate": 0.000136960721787253,
"loss": 0.701,
"step": 710000
},
{
"epoch": 16.49,
"learning_rate": 0.00013649291981718297,
"loss": 0.6937,
"step": 712000
},
{
"epoch": 16.53,
"learning_rate": 0.00013602511784711294,
"loss": 0.6907,
"step": 714000
},
{
"epoch": 16.58,
"learning_rate": 0.0001355573158770429,
"loss": 0.6897,
"step": 716000
},
{
"epoch": 16.63,
"learning_rate": 0.00013508951390697288,
"loss": 0.6952,
"step": 718000
},
{
"epoch": 16.67,
"learning_rate": 0.00013462171193690285,
"loss": 0.6865,
"step": 720000
},
{
"epoch": 16.72,
"learning_rate": 0.00013415390996683282,
"loss": 0.6935,
"step": 722000
},
{
"epoch": 16.76,
"learning_rate": 0.00013368610799676278,
"loss": 0.6919,
"step": 724000
},
{
"epoch": 16.81,
"learning_rate": 0.00013321830602669275,
"loss": 0.6904,
"step": 726000
},
{
"epoch": 16.86,
"learning_rate": 0.00013275050405662272,
"loss": 0.6964,
"step": 728000
},
{
"epoch": 16.9,
"learning_rate": 0.00013228270208655272,
"loss": 0.6943,
"step": 730000
},
{
"epoch": 16.95,
"learning_rate": 0.0001318149001164827,
"loss": 0.6949,
"step": 732000
},
{
"epoch": 17.0,
"learning_rate": 0.00013134709814641266,
"loss": 0.6943,
"step": 734000
},
{
"epoch": 17.04,
"learning_rate": 0.00013087929617634263,
"loss": 0.6851,
"step": 736000
},
{
"epoch": 17.09,
"learning_rate": 0.0001304114942062726,
"loss": 0.6802,
"step": 738000
},
{
"epoch": 17.14,
"learning_rate": 0.00012994369223620256,
"loss": 0.6801,
"step": 740000
},
{
"epoch": 17.18,
"learning_rate": 0.00012947589026613253,
"loss": 0.6756,
"step": 742000
},
{
"epoch": 17.23,
"learning_rate": 0.0001290080882960625,
"loss": 0.6824,
"step": 744000
},
{
"epoch": 17.27,
"learning_rate": 0.00012854028632599247,
"loss": 0.6894,
"step": 746000
},
{
"epoch": 17.32,
"learning_rate": 0.00012807248435592244,
"loss": 0.682,
"step": 748000
},
{
"epoch": 17.37,
"learning_rate": 0.0001276046823858524,
"loss": 0.6814,
"step": 750000
},
{
"epoch": 17.41,
"learning_rate": 0.00012713688041578238,
"loss": 0.6737,
"step": 752000
},
{
"epoch": 17.46,
"learning_rate": 0.00012666907844571234,
"loss": 0.6874,
"step": 754000
},
{
"epoch": 17.51,
"learning_rate": 0.00012620127647564231,
"loss": 0.6842,
"step": 756000
},
{
"epoch": 17.55,
"learning_rate": 0.00012573347450557228,
"loss": 0.6871,
"step": 758000
},
{
"epoch": 17.6,
"learning_rate": 0.00012526567253550225,
"loss": 0.6833,
"step": 760000
},
{
"epoch": 17.64,
"learning_rate": 0.00012479787056543222,
"loss": 0.6818,
"step": 762000
},
{
"epoch": 17.69,
"learning_rate": 0.0001243300685953622,
"loss": 0.6824,
"step": 764000
},
{
"epoch": 17.74,
"learning_rate": 0.00012386226662529219,
"loss": 0.684,
"step": 766000
},
{
"epoch": 17.78,
"learning_rate": 0.00012339446465522215,
"loss": 0.6822,
"step": 768000
},
{
"epoch": 17.83,
"learning_rate": 0.00012292666268515212,
"loss": 0.68,
"step": 770000
},
{
"epoch": 17.88,
"learning_rate": 0.0001224588607150821,
"loss": 0.6842,
"step": 772000
},
{
"epoch": 17.92,
"learning_rate": 0.00012199105874501206,
"loss": 0.6827,
"step": 774000
},
{
"epoch": 17.97,
"learning_rate": 0.00012152325677494203,
"loss": 0.6901,
"step": 776000
},
{
"epoch": 18.02,
"learning_rate": 0.000121055454804872,
"loss": 0.6781,
"step": 778000
},
{
"epoch": 18.06,
"learning_rate": 0.00012058765283480197,
"loss": 0.6768,
"step": 780000
},
{
"epoch": 18.11,
"learning_rate": 0.00012011985086473194,
"loss": 0.6704,
"step": 782000
},
{
"epoch": 18.15,
"learning_rate": 0.0001196520488946619,
"loss": 0.6767,
"step": 784000
},
{
"epoch": 18.2,
"learning_rate": 0.00011918424692459187,
"loss": 0.6696,
"step": 786000
},
{
"epoch": 18.25,
"learning_rate": 0.00011871644495452184,
"loss": 0.6717,
"step": 788000
},
{
"epoch": 18.29,
"learning_rate": 0.00011824864298445181,
"loss": 0.6666,
"step": 790000
},
{
"epoch": 18.34,
"learning_rate": 0.00011778084101438178,
"loss": 0.6681,
"step": 792000
},
{
"epoch": 18.39,
"learning_rate": 0.00011731303904431175,
"loss": 0.6688,
"step": 794000
},
{
"epoch": 18.43,
"learning_rate": 0.00011684523707424172,
"loss": 0.6809,
"step": 796000
},
{
"epoch": 18.48,
"learning_rate": 0.00011637743510417169,
"loss": 0.6704,
"step": 798000
},
{
"epoch": 18.52,
"learning_rate": 0.00011590963313410166,
"loss": 0.6732,
"step": 800000
},
{
"epoch": 18.57,
"learning_rate": 0.00011544183116403164,
"loss": 0.6688,
"step": 802000
},
{
"epoch": 18.62,
"learning_rate": 0.00011497402919396161,
"loss": 0.6767,
"step": 804000
},
{
"epoch": 18.66,
"learning_rate": 0.00011450622722389158,
"loss": 0.6721,
"step": 806000
},
{
"epoch": 18.71,
"learning_rate": 0.00011403842525382154,
"loss": 0.6716,
"step": 808000
},
{
"epoch": 18.76,
"learning_rate": 0.00011357062328375151,
"loss": 0.673,
"step": 810000
},
{
"epoch": 18.8,
"learning_rate": 0.00011310282131368148,
"loss": 0.6717,
"step": 812000
},
{
"epoch": 18.85,
"learning_rate": 0.00011263501934361145,
"loss": 0.6607,
"step": 814000
},
{
"epoch": 18.9,
"learning_rate": 0.00011216721737354142,
"loss": 0.6732,
"step": 816000
},
{
"epoch": 18.94,
"learning_rate": 0.00011169941540347139,
"loss": 0.6715,
"step": 818000
},
{
"epoch": 18.99,
"learning_rate": 0.00011123161343340136,
"loss": 0.678,
"step": 820000
},
{
"epoch": 19.03,
"learning_rate": 0.00011076381146333133,
"loss": 0.6618,
"step": 822000
},
{
"epoch": 19.08,
"learning_rate": 0.0001102960094932613,
"loss": 0.6589,
"step": 824000
},
{
"epoch": 19.13,
"learning_rate": 0.00010982820752319126,
"loss": 0.6624,
"step": 826000
},
{
"epoch": 19.17,
"learning_rate": 0.00010936040555312123,
"loss": 0.6618,
"step": 828000
},
{
"epoch": 19.22,
"learning_rate": 0.0001088926035830512,
"loss": 0.6666,
"step": 830000
},
{
"epoch": 19.27,
"learning_rate": 0.00010842480161298117,
"loss": 0.6645,
"step": 832000
},
{
"epoch": 19.31,
"learning_rate": 0.00010795699964291114,
"loss": 0.6667,
"step": 834000
},
{
"epoch": 19.36,
"learning_rate": 0.00010748919767284111,
"loss": 0.6649,
"step": 836000
},
{
"epoch": 19.4,
"learning_rate": 0.0001070213957027711,
"loss": 0.659,
"step": 838000
},
{
"epoch": 19.45,
"learning_rate": 0.00010655359373270107,
"loss": 0.6611,
"step": 840000
},
{
"epoch": 19.5,
"learning_rate": 0.00010608579176263104,
"loss": 0.6565,
"step": 842000
},
{
"epoch": 19.54,
"learning_rate": 0.00010561798979256101,
"loss": 0.6631,
"step": 844000
},
{
"epoch": 19.59,
"learning_rate": 0.00010515018782249098,
"loss": 0.6593,
"step": 846000
},
{
"epoch": 19.64,
"learning_rate": 0.00010468238585242095,
"loss": 0.6654,
"step": 848000
},
{
"epoch": 19.68,
"learning_rate": 0.00010421458388235092,
"loss": 0.6621,
"step": 850000
},
{
"epoch": 19.73,
"learning_rate": 0.00010374678191228089,
"loss": 0.661,
"step": 852000
},
{
"epoch": 19.78,
"learning_rate": 0.00010327897994221086,
"loss": 0.6515,
"step": 854000
},
{
"epoch": 19.82,
"learning_rate": 0.00010281117797214082,
"loss": 0.6614,
"step": 856000
},
{
"epoch": 19.87,
"learning_rate": 0.0001023433760020708,
"loss": 0.6616,
"step": 858000
},
{
"epoch": 19.91,
"learning_rate": 0.00010187557403200076,
"loss": 0.6598,
"step": 860000
},
{
"epoch": 19.96,
"learning_rate": 0.00010140777206193073,
"loss": 0.6616,
"step": 862000
},
{
"epoch": 20.01,
"learning_rate": 0.0001009399700918607,
"loss": 0.6679,
"step": 864000
},
{
"epoch": 20.05,
"learning_rate": 0.00010047216812179067,
"loss": 0.6518,
"step": 866000
},
{
"epoch": 20.1,
"learning_rate": 0.00010000436615172064,
"loss": 0.6463,
"step": 868000
},
{
"epoch": 20.15,
"learning_rate": 9.95365641816506e-05,
"loss": 0.6529,
"step": 870000
},
{
"epoch": 20.19,
"learning_rate": 9.906876221158058e-05,
"loss": 0.6463,
"step": 872000
},
{
"epoch": 20.24,
"learning_rate": 9.860096024151056e-05,
"loss": 0.6545,
"step": 874000
},
{
"epoch": 20.28,
"learning_rate": 9.813315827144053e-05,
"loss": 0.6531,
"step": 876000
},
{
"epoch": 20.33,
"learning_rate": 9.76653563013705e-05,
"loss": 0.6442,
"step": 878000
},
{
"epoch": 20.38,
"learning_rate": 9.719755433130046e-05,
"loss": 0.65,
"step": 880000
},
{
"epoch": 20.42,
"learning_rate": 9.672975236123043e-05,
"loss": 0.6518,
"step": 882000
},
{
"epoch": 20.47,
"learning_rate": 9.62619503911604e-05,
"loss": 0.6546,
"step": 884000
},
{
"epoch": 20.52,
"learning_rate": 9.579414842109037e-05,
"loss": 0.6494,
"step": 886000
},
{
"epoch": 20.56,
"learning_rate": 9.532634645102035e-05,
"loss": 0.654,
"step": 888000
},
{
"epoch": 20.61,
"learning_rate": 9.485854448095032e-05,
"loss": 0.6536,
"step": 890000
},
{
"epoch": 20.66,
"learning_rate": 9.439074251088029e-05,
"loss": 0.6547,
"step": 892000
},
{
"epoch": 20.7,
"learning_rate": 9.392294054081026e-05,
"loss": 0.6421,
"step": 894000
},
{
"epoch": 20.75,
"learning_rate": 9.345513857074023e-05,
"loss": 0.6506,
"step": 896000
},
{
"epoch": 20.79,
"learning_rate": 9.29873366006702e-05,
"loss": 0.6551,
"step": 898000
},
{
"epoch": 20.84,
"learning_rate": 9.251953463060017e-05,
"loss": 0.6542,
"step": 900000
},
{
"epoch": 20.89,
"learning_rate": 9.205173266053014e-05,
"loss": 0.6398,
"step": 902000
},
{
"epoch": 20.93,
"learning_rate": 9.15839306904601e-05,
"loss": 0.653,
"step": 904000
},
{
"epoch": 20.98,
"learning_rate": 9.111612872039007e-05,
"loss": 0.6476,
"step": 906000
},
{
"epoch": 21.03,
"learning_rate": 9.064832675032004e-05,
"loss": 0.6378,
"step": 908000
},
{
"epoch": 21.07,
"learning_rate": 9.018052478025002e-05,
"loss": 0.6413,
"step": 910000
},
{
"epoch": 21.12,
"learning_rate": 8.971272281017999e-05,
"loss": 0.6368,
"step": 912000
},
{
"epoch": 21.16,
"learning_rate": 8.924492084010996e-05,
"loss": 0.6366,
"step": 914000
},
{
"epoch": 21.21,
"learning_rate": 8.877711887003993e-05,
"loss": 0.6455,
"step": 916000
},
{
"epoch": 21.26,
"learning_rate": 8.83093168999699e-05,
"loss": 0.6448,
"step": 918000
},
{
"epoch": 21.3,
"learning_rate": 8.784151492989987e-05,
"loss": 0.6371,
"step": 920000
},
{
"epoch": 21.35,
"learning_rate": 8.737371295982984e-05,
"loss": 0.6457,
"step": 922000
},
{
"epoch": 21.4,
"learning_rate": 8.69059109897598e-05,
"loss": 0.6399,
"step": 924000
},
{
"epoch": 21.44,
"learning_rate": 8.643810901968978e-05,
"loss": 0.6389,
"step": 926000
},
{
"epoch": 21.49,
"learning_rate": 8.597030704961974e-05,
"loss": 0.6444,
"step": 928000
},
{
"epoch": 21.54,
"learning_rate": 8.550250507954971e-05,
"loss": 0.6346,
"step": 930000
},
{
"epoch": 21.58,
"learning_rate": 8.503470310947968e-05,
"loss": 0.6394,
"step": 932000
},
{
"epoch": 21.63,
"learning_rate": 8.456690113940965e-05,
"loss": 0.6397,
"step": 934000
},
{
"epoch": 21.67,
"learning_rate": 8.409909916933962e-05,
"loss": 0.6411,
"step": 936000
},
{
"epoch": 21.72,
"learning_rate": 8.363129719926959e-05,
"loss": 0.6383,
"step": 938000
},
{
"epoch": 21.77,
"learning_rate": 8.316349522919956e-05,
"loss": 0.6416,
"step": 940000
},
{
"epoch": 21.81,
"learning_rate": 8.269569325912953e-05,
"loss": 0.635,
"step": 942000
},
{
"epoch": 21.86,
"learning_rate": 8.22278912890595e-05,
"loss": 0.6371,
"step": 944000
},
{
"epoch": 21.91,
"learning_rate": 8.176008931898949e-05,
"loss": 0.6412,
"step": 946000
},
{
"epoch": 21.95,
"learning_rate": 8.129228734891946e-05,
"loss": 0.6414,
"step": 948000
},
{
"epoch": 22.0,
"learning_rate": 8.082448537884943e-05,
"loss": 0.6285,
"step": 950000
},
{
"epoch": 22.04,
"learning_rate": 8.03566834087794e-05,
"loss": 0.6285,
"step": 952000
},
{
"epoch": 22.09,
"learning_rate": 7.988888143870937e-05,
"loss": 0.6268,
"step": 954000
},
{
"epoch": 22.14,
"learning_rate": 7.942107946863934e-05,
"loss": 0.6251,
"step": 956000
},
{
"epoch": 22.18,
"learning_rate": 7.89532774985693e-05,
"loss": 0.6306,
"step": 958000
},
{
"epoch": 22.23,
"learning_rate": 7.848547552849927e-05,
"loss": 0.6283,
"step": 960000
},
{
"epoch": 22.28,
"learning_rate": 7.801767355842924e-05,
"loss": 0.6264,
"step": 962000
},
{
"epoch": 22.32,
"learning_rate": 7.754987158835921e-05,
"loss": 0.6279,
"step": 964000
},
{
"epoch": 22.37,
"learning_rate": 7.708206961828918e-05,
"loss": 0.6272,
"step": 966000
},
{
"epoch": 22.41,
"learning_rate": 7.661426764821915e-05,
"loss": 0.6355,
"step": 968000
},
{
"epoch": 22.46,
"learning_rate": 7.614646567814912e-05,
"loss": 0.6349,
"step": 970000
},
{
"epoch": 22.51,
"learning_rate": 7.567866370807909e-05,
"loss": 0.6281,
"step": 972000
},
{
"epoch": 22.55,
"learning_rate": 7.521086173800905e-05,
"loss": 0.6269,
"step": 974000
},
{
"epoch": 22.6,
"learning_rate": 7.474305976793904e-05,
"loss": 0.6221,
"step": 976000
},
{
"epoch": 22.65,
"learning_rate": 7.4275257797869e-05,
"loss": 0.6295,
"step": 978000
},
{
"epoch": 22.69,
"learning_rate": 7.380745582779897e-05,
"loss": 0.6265,
"step": 980000
},
{
"epoch": 22.74,
"learning_rate": 7.333965385772894e-05,
"loss": 0.6203,
"step": 982000
},
{
"epoch": 22.79,
"learning_rate": 7.287185188765891e-05,
"loss": 0.6306,
"step": 984000
},
{
"epoch": 22.83,
"learning_rate": 7.240404991758888e-05,
"loss": 0.6319,
"step": 986000
},
{
"epoch": 22.88,
"learning_rate": 7.193624794751885e-05,
"loss": 0.6211,
"step": 988000
},
{
"epoch": 22.92,
"learning_rate": 7.146844597744882e-05,
"loss": 0.6244,
"step": 990000
},
{
"epoch": 22.97,
"learning_rate": 7.100064400737879e-05,
"loss": 0.6262,
"step": 992000
},
{
"epoch": 23.02,
"learning_rate": 7.053284203730876e-05,
"loss": 0.6166,
"step": 994000
},
{
"epoch": 23.06,
"learning_rate": 7.006504006723873e-05,
"loss": 0.6166,
"step": 996000
},
{
"epoch": 23.11,
"learning_rate": 6.95972380971687e-05,
"loss": 0.6175,
"step": 998000
},
{
"epoch": 23.16,
"learning_rate": 6.912943612709866e-05,
"loss": 0.6151,
"step": 1000000
},
{
"epoch": 23.2,
"learning_rate": 6.866163415702863e-05,
"loss": 0.6153,
"step": 1002000
},
{
"epoch": 23.25,
"learning_rate": 6.81938321869586e-05,
"loss": 0.6212,
"step": 1004000
},
{
"epoch": 23.29,
"learning_rate": 6.772603021688858e-05,
"loss": 0.6161,
"step": 1006000
},
{
"epoch": 23.34,
"learning_rate": 6.725822824681855e-05,
"loss": 0.6158,
"step": 1008000
},
{
"epoch": 23.39,
"learning_rate": 6.679042627674852e-05,
"loss": 0.6089,
"step": 1010000
},
{
"epoch": 23.43,
"learning_rate": 6.632262430667849e-05,
"loss": 0.6166,
"step": 1012000
},
{
"epoch": 23.48,
"learning_rate": 6.585482233660846e-05,
"loss": 0.6134,
"step": 1014000
},
{
"epoch": 23.53,
"learning_rate": 6.538702036653843e-05,
"loss": 0.6171,
"step": 1016000
},
{
"epoch": 23.57,
"learning_rate": 6.49192183964684e-05,
"loss": 0.6122,
"step": 1018000
},
{
"epoch": 23.62,
"learning_rate": 6.445141642639837e-05,
"loss": 0.6176,
"step": 1020000
},
{
"epoch": 23.67,
"learning_rate": 6.398361445632833e-05,
"loss": 0.6146,
"step": 1022000
},
{
"epoch": 23.71,
"learning_rate": 6.35158124862583e-05,
"loss": 0.6069,
"step": 1024000
},
{
"epoch": 23.76,
"learning_rate": 6.304801051618829e-05,
"loss": 0.6169,
"step": 1026000
},
{
"epoch": 23.8,
"learning_rate": 6.258020854611825e-05,
"loss": 0.6222,
"step": 1028000
},
{
"epoch": 23.85,
"learning_rate": 6.211240657604822e-05,
"loss": 0.6152,
"step": 1030000
},
{
"epoch": 23.9,
"learning_rate": 6.164460460597819e-05,
"loss": 0.6181,
"step": 1032000
},
{
"epoch": 23.94,
"learning_rate": 6.117680263590816e-05,
"loss": 0.6123,
"step": 1034000
},
{
"epoch": 23.99,
"learning_rate": 6.070900066583813e-05,
"loss": 0.619,
"step": 1036000
},
{
"epoch": 24.04,
"learning_rate": 6.02411986957681e-05,
"loss": 0.6099,
"step": 1038000
},
{
"epoch": 24.08,
"learning_rate": 5.977339672569807e-05,
"loss": 0.6098,
"step": 1040000
},
{
"epoch": 24.13,
"learning_rate": 5.930559475562804e-05,
"loss": 0.5965,
"step": 1042000
},
{
"epoch": 24.17,
"learning_rate": 5.883779278555802e-05,
"loss": 0.6059,
"step": 1044000
},
{
"epoch": 24.22,
"learning_rate": 5.836999081548799e-05,
"loss": 0.6021,
"step": 1046000
},
{
"epoch": 24.27,
"learning_rate": 5.790218884541796e-05,
"loss": 0.6093,
"step": 1048000
},
{
"epoch": 24.31,
"learning_rate": 5.7434386875347926e-05,
"loss": 0.6031,
"step": 1050000
},
{
"epoch": 24.36,
"learning_rate": 5.6966584905277895e-05,
"loss": 0.6053,
"step": 1052000
},
{
"epoch": 24.41,
"learning_rate": 5.6498782935207863e-05,
"loss": 0.6036,
"step": 1054000
},
{
"epoch": 24.45,
"learning_rate": 5.603098096513783e-05,
"loss": 0.6011,
"step": 1056000
},
{
"epoch": 24.5,
"learning_rate": 5.55631789950678e-05,
"loss": 0.6035,
"step": 1058000
},
{
"epoch": 24.55,
"learning_rate": 5.509537702499777e-05,
"loss": 0.6066,
"step": 1060000
},
{
"epoch": 24.59,
"learning_rate": 5.4627575054927746e-05,
"loss": 0.6061,
"step": 1062000
},
{
"epoch": 24.64,
"learning_rate": 5.4159773084857714e-05,
"loss": 0.6027,
"step": 1064000
},
{
"epoch": 24.68,
"learning_rate": 5.369197111478768e-05,
"loss": 0.6,
"step": 1066000
},
{
"epoch": 24.73,
"learning_rate": 5.322416914471765e-05,
"loss": 0.6062,
"step": 1068000
},
{
"epoch": 24.78,
"learning_rate": 5.275636717464762e-05,
"loss": 0.6003,
"step": 1070000
},
{
"epoch": 24.82,
"learning_rate": 5.228856520457759e-05,
"loss": 0.5988,
"step": 1072000
},
{
"epoch": 24.87,
"learning_rate": 5.182076323450756e-05,
"loss": 0.6096,
"step": 1074000
},
{
"epoch": 24.92,
"learning_rate": 5.135296126443753e-05,
"loss": 0.5988,
"step": 1076000
},
{
"epoch": 24.96,
"learning_rate": 5.08851592943675e-05,
"loss": 0.6086,
"step": 1078000
},
{
"epoch": 25.01,
"learning_rate": 5.041735732429748e-05,
"loss": 0.5942,
"step": 1080000
},
{
"epoch": 25.05,
"learning_rate": 4.994955535422745e-05,
"loss": 0.5954,
"step": 1082000
},
{
"epoch": 25.1,
"learning_rate": 4.9481753384157417e-05,
"loss": 0.5948,
"step": 1084000
},
{
"epoch": 25.15,
"learning_rate": 4.9013951414087385e-05,
"loss": 0.5946,
"step": 1086000
},
{
"epoch": 25.19,
"learning_rate": 4.8546149444017354e-05,
"loss": 0.5938,
"step": 1088000
},
{
"epoch": 25.24,
"learning_rate": 4.807834747394732e-05,
"loss": 0.5961,
"step": 1090000
},
{
"epoch": 25.29,
"learning_rate": 4.761054550387729e-05,
"loss": 0.5947,
"step": 1092000
},
{
"epoch": 25.33,
"learning_rate": 4.714274353380726e-05,
"loss": 0.6019,
"step": 1094000
},
{
"epoch": 25.38,
"learning_rate": 4.667494156373723e-05,
"loss": 0.5927,
"step": 1096000
},
{
"epoch": 25.43,
"learning_rate": 4.6207139593667205e-05,
"loss": 0.5921,
"step": 1098000
},
{
"epoch": 25.47,
"learning_rate": 4.5739337623597174e-05,
"loss": 0.5954,
"step": 1100000
},
{
"epoch": 25.52,
"learning_rate": 4.527153565352715e-05,
"loss": 0.5926,
"step": 1102000
},
{
"epoch": 25.56,
"learning_rate": 4.480373368345712e-05,
"loss": 0.5963,
"step": 1104000
},
{
"epoch": 25.61,
"learning_rate": 4.433593171338709e-05,
"loss": 0.5902,
"step": 1106000
},
{
"epoch": 25.66,
"learning_rate": 4.3868129743317056e-05,
"loss": 0.5952,
"step": 1108000
},
{
"epoch": 25.7,
"learning_rate": 4.3400327773247025e-05,
"loss": 0.5878,
"step": 1110000
},
{
"epoch": 25.75,
"learning_rate": 4.2932525803176994e-05,
"loss": 0.5926,
"step": 1112000
},
{
"epoch": 25.8,
"learning_rate": 4.246472383310696e-05,
"loss": 0.5854,
"step": 1114000
},
{
"epoch": 25.84,
"learning_rate": 4.199692186303694e-05,
"loss": 0.5916,
"step": 1116000
},
{
"epoch": 25.89,
"learning_rate": 4.152911989296691e-05,
"loss": 0.5869,
"step": 1118000
},
{
"epoch": 25.93,
"learning_rate": 4.1061317922896876e-05,
"loss": 0.5913,
"step": 1120000
},
{
"epoch": 25.98,
"learning_rate": 4.0593515952826845e-05,
"loss": 0.5822,
"step": 1122000
},
{
"epoch": 26.03,
"learning_rate": 4.0125713982756814e-05,
"loss": 0.5831,
"step": 1124000
},
{
"epoch": 26.07,
"learning_rate": 3.965791201268678e-05,
"loss": 0.5847,
"step": 1126000
},
{
"epoch": 26.12,
"learning_rate": 3.919011004261675e-05,
"loss": 0.5828,
"step": 1128000
},
{
"epoch": 26.17,
"learning_rate": 3.872230807254672e-05,
"loss": 0.5825,
"step": 1130000
},
{
"epoch": 26.21,
"learning_rate": 3.825450610247669e-05,
"loss": 0.5848,
"step": 1132000
},
{
"epoch": 26.26,
"learning_rate": 3.778670413240667e-05,
"loss": 0.5866,
"step": 1134000
},
{
"epoch": 26.31,
"learning_rate": 3.7318902162336634e-05,
"loss": 0.5832,
"step": 1136000
},
{
"epoch": 26.35,
"learning_rate": 3.685110019226661e-05,
"loss": 0.58,
"step": 1138000
},
{
"epoch": 26.4,
"learning_rate": 3.638329822219658e-05,
"loss": 0.5767,
"step": 1140000
},
{
"epoch": 26.44,
"learning_rate": 3.591549625212655e-05,
"loss": 0.5792,
"step": 1142000
},
{
"epoch": 26.49,
"learning_rate": 3.5447694282056516e-05,
"loss": 0.5764,
"step": 1144000
},
{
"epoch": 26.54,
"learning_rate": 3.4979892311986485e-05,
"loss": 0.5794,
"step": 1146000
},
{
"epoch": 26.58,
"learning_rate": 3.451209034191646e-05,
"loss": 0.5738,
"step": 1148000
},
{
"epoch": 26.63,
"learning_rate": 3.404428837184643e-05,
"loss": 0.5822,
"step": 1150000
},
{
"epoch": 26.68,
"learning_rate": 3.35764864017764e-05,
"loss": 0.5734,
"step": 1152000
},
{
"epoch": 26.72,
"learning_rate": 3.310868443170637e-05,
"loss": 0.5794,
"step": 1154000
},
{
"epoch": 26.77,
"learning_rate": 3.2640882461636336e-05,
"loss": 0.5853,
"step": 1156000
},
{
"epoch": 26.81,
"learning_rate": 3.2173080491566305e-05,
"loss": 0.5842,
"step": 1158000
},
{
"epoch": 26.86,
"learning_rate": 3.170527852149628e-05,
"loss": 0.5847,
"step": 1160000
},
{
"epoch": 26.91,
"learning_rate": 3.123747655142625e-05,
"loss": 0.5786,
"step": 1162000
},
{
"epoch": 26.95,
"learning_rate": 3.076967458135622e-05,
"loss": 0.5818,
"step": 1164000
},
{
"epoch": 27.0,
"learning_rate": 3.030187261128619e-05,
"loss": 0.5722,
"step": 1166000
},
{
"epoch": 27.05,
"learning_rate": 2.983407064121616e-05,
"loss": 0.5726,
"step": 1168000
},
{
"epoch": 27.09,
"learning_rate": 2.936626867114613e-05,
"loss": 0.5745,
"step": 1170000
},
{
"epoch": 27.14,
"learning_rate": 2.8898466701076097e-05,
"loss": 0.5655,
"step": 1172000
},
{
"epoch": 27.19,
"learning_rate": 2.843066473100607e-05,
"loss": 0.5747,
"step": 1174000
},
{
"epoch": 27.23,
"learning_rate": 2.7962862760936038e-05,
"loss": 0.5734,
"step": 1176000
},
{
"epoch": 27.28,
"learning_rate": 2.7495060790866007e-05,
"loss": 0.5752,
"step": 1178000
},
{
"epoch": 27.32,
"learning_rate": 2.7027258820795976e-05,
"loss": 0.5784,
"step": 1180000
},
{
"epoch": 27.37,
"learning_rate": 2.6559456850725945e-05,
"loss": 0.5667,
"step": 1182000
},
{
"epoch": 27.42,
"learning_rate": 2.609165488065592e-05,
"loss": 0.5748,
"step": 1184000
},
{
"epoch": 27.46,
"learning_rate": 2.562385291058589e-05,
"loss": 0.5762,
"step": 1186000
},
{
"epoch": 27.51,
"learning_rate": 2.5156050940515858e-05,
"loss": 0.5783,
"step": 1188000
},
{
"epoch": 27.56,
"learning_rate": 2.4688248970445827e-05,
"loss": 0.5668,
"step": 1190000
},
{
"epoch": 27.6,
"learning_rate": 2.42204470003758e-05,
"loss": 0.5671,
"step": 1192000
},
{
"epoch": 27.65,
"learning_rate": 2.3752645030305768e-05,
"loss": 0.5688,
"step": 1194000
},
{
"epoch": 27.69,
"learning_rate": 2.328484306023574e-05,
"loss": 0.5643,
"step": 1196000
},
{
"epoch": 27.74,
"learning_rate": 2.281704109016571e-05,
"loss": 0.5688,
"step": 1198000
},
{
"epoch": 27.79,
"learning_rate": 2.2349239120095678e-05,
"loss": 0.5651,
"step": 1200000
},
{
"epoch": 27.83,
"learning_rate": 2.188143715002565e-05,
"loss": 0.5705,
"step": 1202000
},
{
"epoch": 27.88,
"learning_rate": 2.141363517995562e-05,
"loss": 0.5684,
"step": 1204000
},
{
"epoch": 27.93,
"learning_rate": 2.0945833209885588e-05,
"loss": 0.567,
"step": 1206000
},
{
"epoch": 27.97,
"learning_rate": 2.0478031239815557e-05,
"loss": 0.5711,
"step": 1208000
},
{
"epoch": 28.02,
"learning_rate": 2.0010229269745533e-05,
"loss": 0.5684,
"step": 1210000
},
{
"epoch": 28.06,
"learning_rate": 1.95424272996755e-05,
"loss": 0.5604,
"step": 1212000
},
{
"epoch": 28.11,
"learning_rate": 1.907462532960547e-05,
"loss": 0.5615,
"step": 1214000
},
{
"epoch": 28.16,
"learning_rate": 1.860682335953544e-05,
"loss": 0.5679,
"step": 1216000
},
{
"epoch": 28.2,
"learning_rate": 1.813902138946541e-05,
"loss": 0.5644,
"step": 1218000
},
{
"epoch": 28.25,
"learning_rate": 1.767121941939538e-05,
"loss": 0.5663,
"step": 1220000
},
{
"epoch": 28.3,
"learning_rate": 1.720341744932535e-05,
"loss": 0.5584,
"step": 1222000
},
{
"epoch": 28.34,
"learning_rate": 1.6735615479255318e-05,
"loss": 0.558,
"step": 1224000
},
{
"epoch": 28.39,
"learning_rate": 1.626781350918529e-05,
"loss": 0.5575,
"step": 1226000
},
{
"epoch": 28.44,
"learning_rate": 1.580001153911526e-05,
"loss": 0.5728,
"step": 1228000
},
{
"epoch": 28.48,
"learning_rate": 1.533220956904523e-05,
"loss": 0.5653,
"step": 1230000
},
{
"epoch": 28.53,
"learning_rate": 1.48644075989752e-05,
"loss": 0.5603,
"step": 1232000
},
{
"epoch": 28.57,
"learning_rate": 1.4396605628905172e-05,
"loss": 0.5613,
"step": 1234000
},
{
"epoch": 28.62,
"learning_rate": 1.3928803658835141e-05,
"loss": 0.5563,
"step": 1236000
},
{
"epoch": 28.67,
"learning_rate": 1.346100168876511e-05,
"loss": 0.5705,
"step": 1238000
},
{
"epoch": 28.71,
"learning_rate": 1.299319971869508e-05,
"loss": 0.5568,
"step": 1240000
},
{
"epoch": 28.76,
"learning_rate": 1.252539774862505e-05,
"loss": 0.5517,
"step": 1242000
},
{
"epoch": 28.81,
"learning_rate": 1.2057595778555022e-05,
"loss": 0.5647,
"step": 1244000
},
{
"epoch": 28.85,
"learning_rate": 1.158979380848499e-05,
"loss": 0.5551,
"step": 1246000
},
{
"epoch": 28.9,
"learning_rate": 1.1121991838414961e-05,
"loss": 0.5598,
"step": 1248000
},
{
"epoch": 28.94,
"learning_rate": 1.0654189868344932e-05,
"loss": 0.562,
"step": 1250000
},
{
"epoch": 28.99,
"learning_rate": 1.0186387898274902e-05,
"loss": 0.5563,
"step": 1252000
}
],
"logging_steps": 2000,
"max_steps": 1295550,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 2.559806921525875e+21,
"trial_name": null,
"trial_params": null
}