{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.99965266119415, "eval_steps": 500, "global_step": 1295550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.6310589688175354e-05, "loss": 4.5055, "step": 2000 }, { "epoch": 0.09, "learning_rate": 9.262117937635071e-05, "loss": 1.4851, "step": 4000 }, { "epoch": 0.14, "learning_rate": 0.00013893176906452608, "loss": 1.1808, "step": 6000 }, { "epoch": 0.19, "learning_rate": 0.00018524235875270141, "loss": 1.132, "step": 8000 }, { "epoch": 0.23, "learning_rate": 0.0002315529484408768, "loss": 1.1349, "step": 10000 }, { "epoch": 0.28, "learning_rate": 0.00027786353812905216, "loss": 1.1422, "step": 12000 }, { "epoch": 0.32, "learning_rate": 0.0002997558073716234, "loss": 1.1513, "step": 14000 }, { "epoch": 0.37, "learning_rate": 0.0002992880054015534, "loss": 1.142, "step": 16000 }, { "epoch": 0.42, "learning_rate": 0.00029882020343148334, "loss": 1.1303, "step": 18000 }, { "epoch": 0.46, "learning_rate": 0.0002983524014614133, "loss": 1.1215, "step": 20000 }, { "epoch": 0.51, "learning_rate": 0.0002978845994913433, "loss": 1.109, "step": 22000 }, { "epoch": 0.56, "learning_rate": 0.00029741679752127325, "loss": 1.1034, "step": 24000 }, { "epoch": 0.6, "learning_rate": 0.0002969489955512032, "loss": 1.0998, "step": 26000 }, { "epoch": 0.65, "learning_rate": 0.00029648119358113324, "loss": 1.0979, "step": 28000 }, { "epoch": 0.69, "learning_rate": 0.00029601339161106316, "loss": 1.083, "step": 30000 }, { "epoch": 0.74, "learning_rate": 0.0002955455896409932, "loss": 1.0741, "step": 32000 }, { "epoch": 0.79, "learning_rate": 0.0002950777876709231, "loss": 1.0741, "step": 34000 }, { "epoch": 0.83, "learning_rate": 0.0002946099857008531, "loss": 1.0707, "step": 36000 }, { "epoch": 0.88, "learning_rate": 0.00029414218373078303, "loss": 1.0689, "step": 38000 }, { "epoch": 0.93, "learning_rate": 0.00029367438176071306, "loss": 1.0536, "step": 40000 }, { "epoch": 0.97, "learning_rate": 0.00029320657979064297, "loss": 1.0556, "step": 42000 }, { "epoch": 1.02, "learning_rate": 0.000292738777820573, "loss": 1.0358, "step": 44000 }, { "epoch": 1.07, "learning_rate": 0.0002922709758505029, "loss": 1.018, "step": 46000 }, { "epoch": 1.11, "learning_rate": 0.00029180317388043293, "loss": 1.0162, "step": 48000 }, { "epoch": 1.16, "learning_rate": 0.00029133537191036285, "loss": 1.017, "step": 50000 }, { "epoch": 1.2, "learning_rate": 0.00029086756994029287, "loss": 1.0145, "step": 52000 }, { "epoch": 1.25, "learning_rate": 0.0002903997679702228, "loss": 1.0103, "step": 54000 }, { "epoch": 1.3, "learning_rate": 0.0002899319660001528, "loss": 1.0046, "step": 56000 }, { "epoch": 1.34, "learning_rate": 0.0002894641640300827, "loss": 1.01, "step": 58000 }, { "epoch": 1.39, "learning_rate": 0.00028899636206001274, "loss": 1.0146, "step": 60000 }, { "epoch": 1.44, "learning_rate": 0.00028852856008994266, "loss": 1.0122, "step": 62000 }, { "epoch": 1.48, "learning_rate": 0.0002880607581198727, "loss": 0.9961, "step": 64000 }, { "epoch": 1.53, "learning_rate": 0.00028759295614980265, "loss": 0.9914, "step": 66000 }, { "epoch": 1.57, "learning_rate": 0.0002871251541797326, "loss": 0.9936, "step": 68000 }, { "epoch": 1.62, "learning_rate": 0.0002866573522096626, "loss": 0.9998, "step": 70000 }, { "epoch": 1.67, "learning_rate": 0.00028618955023959256, "loss": 0.9833, "step": 72000 }, { "epoch": 1.71, "learning_rate": 0.0002857217482695225, "loss": 0.9901, "step": 74000 }, { "epoch": 1.76, "learning_rate": 0.0002852539462994525, "loss": 0.9905, "step": 76000 }, { "epoch": 1.81, "learning_rate": 0.00028478614432938246, "loss": 0.9845, "step": 78000 }, { "epoch": 1.85, "learning_rate": 0.00028431834235931243, "loss": 0.9886, "step": 80000 }, { "epoch": 1.9, "learning_rate": 0.0002838505403892424, "loss": 0.9848, "step": 82000 }, { "epoch": 1.95, "learning_rate": 0.00028338273841917237, "loss": 0.9827, "step": 84000 }, { "epoch": 1.99, "learning_rate": 0.00028291493644910234, "loss": 0.9764, "step": 86000 }, { "epoch": 2.04, "learning_rate": 0.0002824471344790323, "loss": 0.9546, "step": 88000 }, { "epoch": 2.08, "learning_rate": 0.0002819793325089623, "loss": 0.9388, "step": 90000 }, { "epoch": 2.13, "learning_rate": 0.00028151153053889225, "loss": 0.9432, "step": 92000 }, { "epoch": 2.18, "learning_rate": 0.0002810437285688222, "loss": 0.9591, "step": 94000 }, { "epoch": 2.22, "learning_rate": 0.0002805759265987522, "loss": 0.9431, "step": 96000 }, { "epoch": 2.27, "learning_rate": 0.00028010812462868215, "loss": 0.9388, "step": 98000 }, { "epoch": 2.32, "learning_rate": 0.0002796403226586121, "loss": 0.9421, "step": 100000 }, { "epoch": 2.36, "learning_rate": 0.0002791725206885421, "loss": 0.9452, "step": 102000 }, { "epoch": 2.41, "learning_rate": 0.0002787047187184721, "loss": 0.9535, "step": 104000 }, { "epoch": 2.45, "learning_rate": 0.00027823691674840203, "loss": 0.9373, "step": 106000 }, { "epoch": 2.5, "learning_rate": 0.00027776911477833205, "loss": 0.9345, "step": 108000 }, { "epoch": 2.55, "learning_rate": 0.00027730131280826197, "loss": 0.9478, "step": 110000 }, { "epoch": 2.59, "learning_rate": 0.000276833510838192, "loss": 0.9445, "step": 112000 }, { "epoch": 2.64, "learning_rate": 0.0002763657088681219, "loss": 0.9297, "step": 114000 }, { "epoch": 2.69, "learning_rate": 0.0002758979068980519, "loss": 0.9294, "step": 116000 }, { "epoch": 2.73, "learning_rate": 0.00027543010492798184, "loss": 0.9273, "step": 118000 }, { "epoch": 2.78, "learning_rate": 0.00027496230295791186, "loss": 0.9242, "step": 120000 }, { "epoch": 2.83, "learning_rate": 0.0002744945009878418, "loss": 0.9228, "step": 122000 }, { "epoch": 2.87, "learning_rate": 0.0002740266990177718, "loss": 0.9316, "step": 124000 }, { "epoch": 2.92, "learning_rate": 0.0002735588970477017, "loss": 0.9448, "step": 126000 }, { "epoch": 2.96, "learning_rate": 0.00027309109507763174, "loss": 0.9253, "step": 128000 }, { "epoch": 3.01, "learning_rate": 0.00027262329310756165, "loss": 0.9197, "step": 130000 }, { "epoch": 3.06, "learning_rate": 0.0002721554911374917, "loss": 0.9025, "step": 132000 }, { "epoch": 3.1, "learning_rate": 0.0002716876891674216, "loss": 0.8983, "step": 134000 }, { "epoch": 3.15, "learning_rate": 0.0002712198871973516, "loss": 0.9013, "step": 136000 }, { "epoch": 3.2, "learning_rate": 0.0002707520852272816, "loss": 0.9019, "step": 138000 }, { "epoch": 3.24, "learning_rate": 0.00027028428325721155, "loss": 0.9016, "step": 140000 }, { "epoch": 3.29, "learning_rate": 0.0002698164812871415, "loss": 0.8979, "step": 142000 }, { "epoch": 3.33, "learning_rate": 0.0002693486793170715, "loss": 0.8961, "step": 144000 }, { "epoch": 3.38, "learning_rate": 0.00026888087734700146, "loss": 0.9007, "step": 146000 }, { "epoch": 3.43, "learning_rate": 0.00026841307537693143, "loss": 0.8951, "step": 148000 }, { "epoch": 3.47, "learning_rate": 0.0002679452734068614, "loss": 0.8926, "step": 150000 }, { "epoch": 3.52, "learning_rate": 0.00026747747143679137, "loss": 0.8924, "step": 152000 }, { "epoch": 3.57, "learning_rate": 0.00026700966946672134, "loss": 0.9044, "step": 154000 }, { "epoch": 3.61, "learning_rate": 0.0002665418674966513, "loss": 0.8952, "step": 156000 }, { "epoch": 3.66, "learning_rate": 0.00026607406552658127, "loss": 0.9001, "step": 158000 }, { "epoch": 3.7, "learning_rate": 0.00026560626355651124, "loss": 0.8898, "step": 160000 }, { "epoch": 3.75, "learning_rate": 0.0002651384615864412, "loss": 0.895, "step": 162000 }, { "epoch": 3.8, "learning_rate": 0.0002646706596163712, "loss": 0.9015, "step": 164000 }, { "epoch": 3.84, "learning_rate": 0.00026420285764630115, "loss": 0.892, "step": 166000 }, { "epoch": 3.89, "learning_rate": 0.0002637350556762311, "loss": 0.8903, "step": 168000 }, { "epoch": 3.94, "learning_rate": 0.0002632672537061611, "loss": 0.8916, "step": 170000 }, { "epoch": 3.98, "learning_rate": 0.00026279945173609105, "loss": 0.8941, "step": 172000 }, { "epoch": 4.03, "learning_rate": 0.000262331649766021, "loss": 0.8771, "step": 174000 }, { "epoch": 4.08, "learning_rate": 0.000261863847795951, "loss": 0.8716, "step": 176000 }, { "epoch": 4.12, "learning_rate": 0.00026139604582588096, "loss": 0.8632, "step": 178000 }, { "epoch": 4.17, "learning_rate": 0.00026092824385581093, "loss": 0.8573, "step": 180000 }, { "epoch": 4.21, "learning_rate": 0.0002604604418857409, "loss": 0.8642, "step": 182000 }, { "epoch": 4.26, "learning_rate": 0.00025999263991567087, "loss": 0.8642, "step": 184000 }, { "epoch": 4.31, "learning_rate": 0.00025952483794560084, "loss": 0.8617, "step": 186000 }, { "epoch": 4.35, "learning_rate": 0.0002590570359755308, "loss": 0.8574, "step": 188000 }, { "epoch": 4.4, "learning_rate": 0.0002585892340054608, "loss": 0.8612, "step": 190000 }, { "epoch": 4.45, "learning_rate": 0.00025812143203539074, "loss": 0.8706, "step": 192000 }, { "epoch": 4.49, "learning_rate": 0.0002576536300653207, "loss": 0.8605, "step": 194000 }, { "epoch": 4.54, "learning_rate": 0.0002571858280952507, "loss": 0.8703, "step": 196000 }, { "epoch": 4.58, "learning_rate": 0.00025671802612518065, "loss": 0.8691, "step": 198000 }, { "epoch": 4.63, "learning_rate": 0.0002562502241551106, "loss": 0.873, "step": 200000 }, { "epoch": 4.68, "learning_rate": 0.0002557824221850406, "loss": 0.857, "step": 202000 }, { "epoch": 4.72, "learning_rate": 0.00025531462021497056, "loss": 0.8686, "step": 204000 }, { "epoch": 4.77, "learning_rate": 0.0002548468182449005, "loss": 0.868, "step": 206000 }, { "epoch": 4.82, "learning_rate": 0.0002543790162748305, "loss": 0.8756, "step": 208000 }, { "epoch": 4.86, "learning_rate": 0.0002539112143047605, "loss": 0.8656, "step": 210000 }, { "epoch": 4.91, "learning_rate": 0.00025344341233469043, "loss": 0.8725, "step": 212000 }, { "epoch": 4.96, "learning_rate": 0.00025297561036462045, "loss": 0.8675, "step": 214000 }, { "epoch": 5.0, "learning_rate": 0.00025250780839455037, "loss": 0.8644, "step": 216000 }, { "epoch": 5.05, "learning_rate": 0.0002520400064244804, "loss": 0.8397, "step": 218000 }, { "epoch": 5.09, "learning_rate": 0.0002515722044544103, "loss": 0.8374, "step": 220000 }, { "epoch": 5.14, "learning_rate": 0.00025110440248434033, "loss": 0.8364, "step": 222000 }, { "epoch": 5.19, "learning_rate": 0.00025063660051427025, "loss": 0.8454, "step": 224000 }, { "epoch": 5.23, "learning_rate": 0.00025016879854420027, "loss": 0.8367, "step": 226000 }, { "epoch": 5.28, "learning_rate": 0.0002497009965741302, "loss": 0.8275, "step": 228000 }, { "epoch": 5.33, "learning_rate": 0.0002492331946040602, "loss": 0.8398, "step": 230000 }, { "epoch": 5.37, "learning_rate": 0.0002487653926339901, "loss": 0.8403, "step": 232000 }, { "epoch": 5.42, "learning_rate": 0.00024829759066392014, "loss": 0.8409, "step": 234000 }, { "epoch": 5.46, "learning_rate": 0.00024782978869385006, "loss": 0.8366, "step": 236000 }, { "epoch": 5.51, "learning_rate": 0.0002473619867237801, "loss": 0.8381, "step": 238000 }, { "epoch": 5.56, "learning_rate": 0.00024689418475371, "loss": 0.842, "step": 240000 }, { "epoch": 5.6, "learning_rate": 0.00024642638278364, "loss": 0.843, "step": 242000 }, { "epoch": 5.65, "learning_rate": 0.00024595858081357, "loss": 0.8489, "step": 244000 }, { "epoch": 5.7, "learning_rate": 0.00024549077884349996, "loss": 0.8313, "step": 246000 }, { "epoch": 5.74, "learning_rate": 0.0002450229768734299, "loss": 0.8468, "step": 248000 }, { "epoch": 5.79, "learning_rate": 0.0002445551749033599, "loss": 0.8446, "step": 250000 }, { "epoch": 5.84, "learning_rate": 0.00024408737293328986, "loss": 0.8329, "step": 252000 }, { "epoch": 5.88, "learning_rate": 0.00024361957096321983, "loss": 0.8402, "step": 254000 }, { "epoch": 5.93, "learning_rate": 0.0002431517689931498, "loss": 0.8502, "step": 256000 }, { "epoch": 5.97, "learning_rate": 0.00024268396702307977, "loss": 0.8437, "step": 258000 }, { "epoch": 6.02, "learning_rate": 0.00024221616505300974, "loss": 0.8341, "step": 260000 }, { "epoch": 6.07, "learning_rate": 0.0002417483630829397, "loss": 0.8152, "step": 262000 }, { "epoch": 6.11, "learning_rate": 0.00024128056111286968, "loss": 0.8189, "step": 264000 }, { "epoch": 6.16, "learning_rate": 0.00024081275914279965, "loss": 0.8193, "step": 266000 }, { "epoch": 6.21, "learning_rate": 0.00024034495717272961, "loss": 0.825, "step": 268000 }, { "epoch": 6.25, "learning_rate": 0.00023987715520265958, "loss": 0.818, "step": 270000 }, { "epoch": 6.3, "learning_rate": 0.00023940935323258955, "loss": 0.8204, "step": 272000 }, { "epoch": 6.34, "learning_rate": 0.00023894155126251952, "loss": 0.823, "step": 274000 }, { "epoch": 6.39, "learning_rate": 0.0002384737492924495, "loss": 0.8179, "step": 276000 }, { "epoch": 6.44, "learning_rate": 0.00023800594732237946, "loss": 0.8152, "step": 278000 }, { "epoch": 6.48, "learning_rate": 0.00023753814535230943, "loss": 0.8178, "step": 280000 }, { "epoch": 6.53, "learning_rate": 0.00023707034338223942, "loss": 0.8212, "step": 282000 }, { "epoch": 6.58, "learning_rate": 0.00023660254141216937, "loss": 0.8161, "step": 284000 }, { "epoch": 6.62, "learning_rate": 0.00023613473944209936, "loss": 0.8162, "step": 286000 }, { "epoch": 6.67, "learning_rate": 0.0002356669374720293, "loss": 0.828, "step": 288000 }, { "epoch": 6.72, "learning_rate": 0.0002351991355019593, "loss": 0.823, "step": 290000 }, { "epoch": 6.76, "learning_rate": 0.00023473133353188927, "loss": 0.8156, "step": 292000 }, { "epoch": 6.81, "learning_rate": 0.00023426353156181924, "loss": 0.8176, "step": 294000 }, { "epoch": 6.85, "learning_rate": 0.0002337957295917492, "loss": 0.8226, "step": 296000 }, { "epoch": 6.9, "learning_rate": 0.00023332792762167917, "loss": 0.8189, "step": 298000 }, { "epoch": 6.95, "learning_rate": 0.00023286012565160914, "loss": 0.8082, "step": 300000 }, { "epoch": 6.99, "learning_rate": 0.0002323923236815391, "loss": 0.8257, "step": 302000 }, { "epoch": 7.04, "learning_rate": 0.00023192452171146908, "loss": 0.7925, "step": 304000 }, { "epoch": 7.09, "learning_rate": 0.00023145671974139905, "loss": 0.7902, "step": 306000 }, { "epoch": 7.13, "learning_rate": 0.00023098891777132902, "loss": 0.7994, "step": 308000 }, { "epoch": 7.18, "learning_rate": 0.000230521115801259, "loss": 0.8029, "step": 310000 }, { "epoch": 7.22, "learning_rate": 0.00023005331383118896, "loss": 0.7983, "step": 312000 }, { "epoch": 7.27, "learning_rate": 0.00022958551186111893, "loss": 0.797, "step": 314000 }, { "epoch": 7.32, "learning_rate": 0.00022911770989104892, "loss": 0.8009, "step": 316000 }, { "epoch": 7.36, "learning_rate": 0.00022864990792097886, "loss": 0.7979, "step": 318000 }, { "epoch": 7.41, "learning_rate": 0.00022818210595090886, "loss": 0.8055, "step": 320000 }, { "epoch": 7.46, "learning_rate": 0.0002277143039808388, "loss": 0.7971, "step": 322000 }, { "epoch": 7.5, "learning_rate": 0.0002272465020107688, "loss": 0.8066, "step": 324000 }, { "epoch": 7.55, "learning_rate": 0.00022677870004069874, "loss": 0.7975, "step": 326000 }, { "epoch": 7.6, "learning_rate": 0.00022631089807062873, "loss": 0.7912, "step": 328000 }, { "epoch": 7.64, "learning_rate": 0.00022584309610055868, "loss": 0.7988, "step": 330000 }, { "epoch": 7.69, "learning_rate": 0.00022537529413048867, "loss": 0.7999, "step": 332000 }, { "epoch": 7.73, "learning_rate": 0.00022490749216041861, "loss": 0.8019, "step": 334000 }, { "epoch": 7.78, "learning_rate": 0.0002244396901903486, "loss": 0.8108, "step": 336000 }, { "epoch": 7.83, "learning_rate": 0.00022397188822027855, "loss": 0.8075, "step": 338000 }, { "epoch": 7.87, "learning_rate": 0.00022350408625020855, "loss": 0.7995, "step": 340000 }, { "epoch": 7.92, "learning_rate": 0.0002230362842801385, "loss": 0.802, "step": 342000 }, { "epoch": 7.97, "learning_rate": 0.00022256848231006848, "loss": 0.7998, "step": 344000 }, { "epoch": 8.01, "learning_rate": 0.00022210068033999843, "loss": 0.8023, "step": 346000 }, { "epoch": 8.06, "learning_rate": 0.00022163287836992842, "loss": 0.7748, "step": 348000 }, { "epoch": 8.1, "learning_rate": 0.00022116507639985836, "loss": 0.7839, "step": 350000 }, { "epoch": 8.15, "learning_rate": 0.00022069727442978836, "loss": 0.7806, "step": 352000 }, { "epoch": 8.2, "learning_rate": 0.00022022947245971836, "loss": 0.7775, "step": 354000 }, { "epoch": 8.24, "learning_rate": 0.0002197616704896483, "loss": 0.7734, "step": 356000 }, { "epoch": 8.29, "learning_rate": 0.0002192938685195783, "loss": 0.7728, "step": 358000 }, { "epoch": 8.34, "learning_rate": 0.00021882606654950824, "loss": 0.7879, "step": 360000 }, { "epoch": 8.38, "learning_rate": 0.00021835826457943823, "loss": 0.7891, "step": 362000 }, { "epoch": 8.43, "learning_rate": 0.00021789046260936817, "loss": 0.7922, "step": 364000 }, { "epoch": 8.48, "learning_rate": 0.00021742266063929817, "loss": 0.7837, "step": 366000 }, { "epoch": 8.52, "learning_rate": 0.0002169548586692281, "loss": 0.7838, "step": 368000 }, { "epoch": 8.57, "learning_rate": 0.0002164870566991581, "loss": 0.7797, "step": 370000 }, { "epoch": 8.61, "learning_rate": 0.00021601925472908805, "loss": 0.7818, "step": 372000 }, { "epoch": 8.66, "learning_rate": 0.00021555145275901804, "loss": 0.7838, "step": 374000 }, { "epoch": 8.71, "learning_rate": 0.000215083650788948, "loss": 0.7828, "step": 376000 }, { "epoch": 8.75, "learning_rate": 0.00021461584881887798, "loss": 0.7822, "step": 378000 }, { "epoch": 8.8, "learning_rate": 0.00021414804684880792, "loss": 0.7952, "step": 380000 }, { "epoch": 8.85, "learning_rate": 0.00021368024487873792, "loss": 0.7888, "step": 382000 }, { "epoch": 8.89, "learning_rate": 0.00021321244290866786, "loss": 0.7813, "step": 384000 }, { "epoch": 8.94, "learning_rate": 0.00021274464093859786, "loss": 0.7784, "step": 386000 }, { "epoch": 8.98, "learning_rate": 0.00021227683896852783, "loss": 0.776, "step": 388000 }, { "epoch": 9.03, "learning_rate": 0.0002118090369984578, "loss": 0.7686, "step": 390000 }, { "epoch": 9.08, "learning_rate": 0.0002113412350283878, "loss": 0.7576, "step": 392000 }, { "epoch": 9.12, "learning_rate": 0.00021087343305831773, "loss": 0.7611, "step": 394000 }, { "epoch": 9.17, "learning_rate": 0.00021040563108824773, "loss": 0.7698, "step": 396000 }, { "epoch": 9.22, "learning_rate": 0.00020993782911817767, "loss": 0.7632, "step": 398000 }, { "epoch": 9.26, "learning_rate": 0.00020947002714810767, "loss": 0.7725, "step": 400000 }, { "epoch": 9.31, "learning_rate": 0.0002090022251780376, "loss": 0.7706, "step": 402000 }, { "epoch": 9.35, "learning_rate": 0.0002085344232079676, "loss": 0.7709, "step": 404000 }, { "epoch": 9.4, "learning_rate": 0.00020806662123789755, "loss": 0.7651, "step": 406000 }, { "epoch": 9.45, "learning_rate": 0.00020759881926782754, "loss": 0.7657, "step": 408000 }, { "epoch": 9.49, "learning_rate": 0.00020713101729775748, "loss": 0.7589, "step": 410000 }, { "epoch": 9.54, "learning_rate": 0.00020666321532768748, "loss": 0.7683, "step": 412000 }, { "epoch": 9.59, "learning_rate": 0.00020619541335761742, "loss": 0.7684, "step": 414000 }, { "epoch": 9.63, "learning_rate": 0.00020572761138754742, "loss": 0.7756, "step": 416000 }, { "epoch": 9.68, "learning_rate": 0.00020525980941747736, "loss": 0.7653, "step": 418000 }, { "epoch": 9.73, "learning_rate": 0.00020479200744740736, "loss": 0.7718, "step": 420000 }, { "epoch": 9.77, "learning_rate": 0.0002043242054773373, "loss": 0.7676, "step": 422000 }, { "epoch": 9.82, "learning_rate": 0.0002038564035072673, "loss": 0.772, "step": 424000 }, { "epoch": 9.86, "learning_rate": 0.00020338860153719726, "loss": 0.766, "step": 426000 }, { "epoch": 9.91, "learning_rate": 0.00020292079956712723, "loss": 0.7739, "step": 428000 }, { "epoch": 9.96, "learning_rate": 0.0002024529975970572, "loss": 0.7743, "step": 430000 }, { "epoch": 10.0, "learning_rate": 0.00020198519562698717, "loss": 0.7719, "step": 432000 }, { "epoch": 10.05, "learning_rate": 0.00020151739365691714, "loss": 0.7443, "step": 434000 }, { "epoch": 10.1, "learning_rate": 0.0002010495916868471, "loss": 0.7573, "step": 436000 }, { "epoch": 10.14, "learning_rate": 0.00020058178971677708, "loss": 0.7546, "step": 438000 }, { "epoch": 10.19, "learning_rate": 0.00020011398774670704, "loss": 0.7516, "step": 440000 }, { "epoch": 10.23, "learning_rate": 0.000199646185776637, "loss": 0.7444, "step": 442000 }, { "epoch": 10.28, "learning_rate": 0.00019917838380656698, "loss": 0.7656, "step": 444000 }, { "epoch": 10.33, "learning_rate": 0.00019871058183649695, "loss": 0.7452, "step": 446000 }, { "epoch": 10.37, "learning_rate": 0.00019824277986642692, "loss": 0.7555, "step": 448000 }, { "epoch": 10.42, "learning_rate": 0.0001977749778963569, "loss": 0.7486, "step": 450000 }, { "epoch": 10.47, "learning_rate": 0.00019730717592628686, "loss": 0.7509, "step": 452000 }, { "epoch": 10.51, "learning_rate": 0.00019683937395621683, "loss": 0.7484, "step": 454000 }, { "epoch": 10.56, "learning_rate": 0.0001963715719861468, "loss": 0.7554, "step": 456000 }, { "epoch": 10.61, "learning_rate": 0.00019590377001607676, "loss": 0.7557, "step": 458000 }, { "epoch": 10.65, "learning_rate": 0.00019543596804600676, "loss": 0.7603, "step": 460000 }, { "epoch": 10.7, "learning_rate": 0.00019496816607593673, "loss": 0.7577, "step": 462000 }, { "epoch": 10.74, "learning_rate": 0.0001945003641058667, "loss": 0.7641, "step": 464000 }, { "epoch": 10.79, "learning_rate": 0.00019403256213579667, "loss": 0.7648, "step": 466000 }, { "epoch": 10.84, "learning_rate": 0.00019356476016572664, "loss": 0.755, "step": 468000 }, { "epoch": 10.88, "learning_rate": 0.0001930969581956566, "loss": 0.7445, "step": 470000 }, { "epoch": 10.93, "learning_rate": 0.00019262915622558657, "loss": 0.7614, "step": 472000 }, { "epoch": 10.98, "learning_rate": 0.00019216135425551654, "loss": 0.7526, "step": 474000 }, { "epoch": 11.02, "learning_rate": 0.0001916935522854465, "loss": 0.7493, "step": 476000 }, { "epoch": 11.07, "learning_rate": 0.00019122575031537648, "loss": 0.7299, "step": 478000 }, { "epoch": 11.11, "learning_rate": 0.00019075794834530645, "loss": 0.7379, "step": 480000 }, { "epoch": 11.16, "learning_rate": 0.00019029014637523642, "loss": 0.7365, "step": 482000 }, { "epoch": 11.21, "learning_rate": 0.00018982234440516639, "loss": 0.7402, "step": 484000 }, { "epoch": 11.25, "learning_rate": 0.00018935454243509636, "loss": 0.7409, "step": 486000 }, { "epoch": 11.3, "learning_rate": 0.00018888674046502632, "loss": 0.7294, "step": 488000 }, { "epoch": 11.35, "learning_rate": 0.0001884189384949563, "loss": 0.7467, "step": 490000 }, { "epoch": 11.39, "learning_rate": 0.00018795113652488626, "loss": 0.7357, "step": 492000 }, { "epoch": 11.44, "learning_rate": 0.00018748333455481623, "loss": 0.744, "step": 494000 }, { "epoch": 11.49, "learning_rate": 0.0001870155325847462, "loss": 0.741, "step": 496000 }, { "epoch": 11.53, "learning_rate": 0.0001865477306146762, "loss": 0.7404, "step": 498000 }, { "epoch": 11.58, "learning_rate": 0.00018607992864460614, "loss": 0.749, "step": 500000 }, { "epoch": 11.62, "learning_rate": 0.00018561212667453613, "loss": 0.7388, "step": 502000 }, { "epoch": 11.67, "learning_rate": 0.00018514432470446607, "loss": 0.742, "step": 504000 }, { "epoch": 11.72, "learning_rate": 0.00018467652273439607, "loss": 0.7481, "step": 506000 }, { "epoch": 11.76, "learning_rate": 0.000184208720764326, "loss": 0.7553, "step": 508000 }, { "epoch": 11.81, "learning_rate": 0.000183740918794256, "loss": 0.7457, "step": 510000 }, { "epoch": 11.86, "learning_rate": 0.00018327311682418595, "loss": 0.7447, "step": 512000 }, { "epoch": 11.9, "learning_rate": 0.00018280531485411595, "loss": 0.752, "step": 514000 }, { "epoch": 11.95, "learning_rate": 0.0001823375128840459, "loss": 0.7419, "step": 516000 }, { "epoch": 11.99, "learning_rate": 0.00018186971091397588, "loss": 0.7412, "step": 518000 }, { "epoch": 12.04, "learning_rate": 0.00018140190894390583, "loss": 0.7262, "step": 520000 }, { "epoch": 12.09, "learning_rate": 0.00018093410697383582, "loss": 0.7299, "step": 522000 }, { "epoch": 12.13, "learning_rate": 0.00018046630500376576, "loss": 0.7279, "step": 524000 }, { "epoch": 12.18, "learning_rate": 0.00017999850303369576, "loss": 0.7308, "step": 526000 }, { "epoch": 12.23, "learning_rate": 0.0001795307010636257, "loss": 0.7305, "step": 528000 }, { "epoch": 12.27, "learning_rate": 0.0001790628990935557, "loss": 0.7348, "step": 530000 }, { "epoch": 12.32, "learning_rate": 0.0001785950971234857, "loss": 0.7312, "step": 532000 }, { "epoch": 12.37, "learning_rate": 0.00017812729515341563, "loss": 0.7275, "step": 534000 }, { "epoch": 12.41, "learning_rate": 0.00017765949318334563, "loss": 0.7291, "step": 536000 }, { "epoch": 12.46, "learning_rate": 0.00017719169121327557, "loss": 0.7265, "step": 538000 }, { "epoch": 12.5, "learning_rate": 0.00017672388924320557, "loss": 0.7224, "step": 540000 }, { "epoch": 12.55, "learning_rate": 0.0001762560872731355, "loss": 0.7232, "step": 542000 }, { "epoch": 12.6, "learning_rate": 0.0001757882853030655, "loss": 0.7272, "step": 544000 }, { "epoch": 12.64, "learning_rate": 0.00017532048333299545, "loss": 0.7305, "step": 546000 }, { "epoch": 12.69, "learning_rate": 0.00017485268136292544, "loss": 0.7375, "step": 548000 }, { "epoch": 12.74, "learning_rate": 0.00017438487939285539, "loss": 0.7377, "step": 550000 }, { "epoch": 12.78, "learning_rate": 0.00017391707742278538, "loss": 0.7278, "step": 552000 }, { "epoch": 12.83, "learning_rate": 0.00017344927545271532, "loss": 0.7369, "step": 554000 }, { "epoch": 12.87, "learning_rate": 0.00017298147348264532, "loss": 0.7366, "step": 556000 }, { "epoch": 12.92, "learning_rate": 0.00017251367151257526, "loss": 0.736, "step": 558000 }, { "epoch": 12.97, "learning_rate": 0.00017204586954250526, "loss": 0.737, "step": 560000 }, { "epoch": 13.01, "learning_rate": 0.0001715780675724352, "loss": 0.7301, "step": 562000 }, { "epoch": 13.06, "learning_rate": 0.0001711102656023652, "loss": 0.7003, "step": 564000 }, { "epoch": 13.11, "learning_rate": 0.00017064246363229514, "loss": 0.7132, "step": 566000 }, { "epoch": 13.15, "learning_rate": 0.00017017466166222513, "loss": 0.7178, "step": 568000 }, { "epoch": 13.2, "learning_rate": 0.00016970685969215513, "loss": 0.7187, "step": 570000 }, { "epoch": 13.25, "learning_rate": 0.00016923905772208507, "loss": 0.7239, "step": 572000 }, { "epoch": 13.29, "learning_rate": 0.00016877125575201507, "loss": 0.7271, "step": 574000 }, { "epoch": 13.34, "learning_rate": 0.000168303453781945, "loss": 0.7208, "step": 576000 }, { "epoch": 13.38, "learning_rate": 0.000167835651811875, "loss": 0.7199, "step": 578000 }, { "epoch": 13.43, "learning_rate": 0.00016736784984180495, "loss": 0.7094, "step": 580000 }, { "epoch": 13.48, "learning_rate": 0.00016690004787173494, "loss": 0.7114, "step": 582000 }, { "epoch": 13.52, "learning_rate": 0.00016643224590166488, "loss": 0.7196, "step": 584000 }, { "epoch": 13.57, "learning_rate": 0.00016596444393159488, "loss": 0.7222, "step": 586000 }, { "epoch": 13.62, "learning_rate": 0.00016549664196152482, "loss": 0.7345, "step": 588000 }, { "epoch": 13.66, "learning_rate": 0.00016502883999145482, "loss": 0.7208, "step": 590000 }, { "epoch": 13.71, "learning_rate": 0.00016456103802138476, "loss": 0.7298, "step": 592000 }, { "epoch": 13.75, "learning_rate": 0.00016409323605131475, "loss": 0.7324, "step": 594000 }, { "epoch": 13.8, "learning_rate": 0.0001636254340812447, "loss": 0.7243, "step": 596000 }, { "epoch": 13.85, "learning_rate": 0.0001631576321111747, "loss": 0.7215, "step": 598000 }, { "epoch": 13.89, "learning_rate": 0.00016268983014110463, "loss": 0.7246, "step": 600000 }, { "epoch": 13.94, "learning_rate": 0.00016222202817103463, "loss": 0.7219, "step": 602000 }, { "epoch": 13.99, "learning_rate": 0.0001617542262009646, "loss": 0.7248, "step": 604000 }, { "epoch": 14.03, "learning_rate": 0.00016128642423089457, "loss": 0.7139, "step": 606000 }, { "epoch": 14.08, "learning_rate": 0.00016081862226082454, "loss": 0.7026, "step": 608000 }, { "epoch": 14.13, "learning_rate": 0.0001603508202907545, "loss": 0.7107, "step": 610000 }, { "epoch": 14.17, "learning_rate": 0.00015988301832068447, "loss": 0.7037, "step": 612000 }, { "epoch": 14.22, "learning_rate": 0.00015941521635061444, "loss": 0.7145, "step": 614000 }, { "epoch": 14.26, "learning_rate": 0.0001589474143805444, "loss": 0.7181, "step": 616000 }, { "epoch": 14.31, "learning_rate": 0.00015847961241047438, "loss": 0.7026, "step": 618000 }, { "epoch": 14.36, "learning_rate": 0.00015801181044040435, "loss": 0.7142, "step": 620000 }, { "epoch": 14.4, "learning_rate": 0.00015754400847033432, "loss": 0.7087, "step": 622000 }, { "epoch": 14.45, "learning_rate": 0.0001570762065002643, "loss": 0.7109, "step": 624000 }, { "epoch": 14.5, "learning_rate": 0.00015660840453019426, "loss": 0.7031, "step": 626000 }, { "epoch": 14.54, "learning_rate": 0.00015614060256012425, "loss": 0.7101, "step": 628000 }, { "epoch": 14.59, "learning_rate": 0.0001556728005900542, "loss": 0.7152, "step": 630000 }, { "epoch": 14.63, "learning_rate": 0.0001552049986199842, "loss": 0.7147, "step": 632000 }, { "epoch": 14.68, "learning_rate": 0.00015473719664991413, "loss": 0.7144, "step": 634000 }, { "epoch": 14.73, "learning_rate": 0.00015426939467984413, "loss": 0.7113, "step": 636000 }, { "epoch": 14.77, "learning_rate": 0.00015380159270977407, "loss": 0.7071, "step": 638000 }, { "epoch": 14.82, "learning_rate": 0.00015333379073970407, "loss": 0.7118, "step": 640000 }, { "epoch": 14.87, "learning_rate": 0.00015286598876963403, "loss": 0.7098, "step": 642000 }, { "epoch": 14.91, "learning_rate": 0.000152398186799564, "loss": 0.706, "step": 644000 }, { "epoch": 14.96, "learning_rate": 0.00015193038482949397, "loss": 0.709, "step": 646000 }, { "epoch": 15.01, "learning_rate": 0.00015146258285942394, "loss": 0.7087, "step": 648000 }, { "epoch": 15.05, "learning_rate": 0.0001509947808893539, "loss": 0.6983, "step": 650000 }, { "epoch": 15.1, "learning_rate": 0.00015052697891928388, "loss": 0.693, "step": 652000 }, { "epoch": 15.14, "learning_rate": 0.00015005917694921385, "loss": 0.6953, "step": 654000 }, { "epoch": 15.19, "learning_rate": 0.00014959137497914382, "loss": 0.6994, "step": 656000 }, { "epoch": 15.24, "learning_rate": 0.00014912357300907379, "loss": 0.6975, "step": 658000 }, { "epoch": 15.28, "learning_rate": 0.00014865577103900375, "loss": 0.7047, "step": 660000 }, { "epoch": 15.33, "learning_rate": 0.00014818796906893372, "loss": 0.6975, "step": 662000 }, { "epoch": 15.38, "learning_rate": 0.0001477201670988637, "loss": 0.704, "step": 664000 }, { "epoch": 15.42, "learning_rate": 0.00014725236512879366, "loss": 0.7042, "step": 666000 }, { "epoch": 15.47, "learning_rate": 0.00014678456315872363, "loss": 0.6917, "step": 668000 }, { "epoch": 15.51, "learning_rate": 0.0001463167611886536, "loss": 0.6914, "step": 670000 }, { "epoch": 15.56, "learning_rate": 0.00014584895921858357, "loss": 0.7018, "step": 672000 }, { "epoch": 15.61, "learning_rate": 0.00014538115724851354, "loss": 0.7016, "step": 674000 }, { "epoch": 15.65, "learning_rate": 0.0001449133552784435, "loss": 0.7078, "step": 676000 }, { "epoch": 15.7, "learning_rate": 0.00014444555330837347, "loss": 0.6932, "step": 678000 }, { "epoch": 15.75, "learning_rate": 0.00014397775133830344, "loss": 0.6964, "step": 680000 }, { "epoch": 15.79, "learning_rate": 0.0001435099493682334, "loss": 0.6997, "step": 682000 }, { "epoch": 15.84, "learning_rate": 0.00014304214739816338, "loss": 0.7065, "step": 684000 }, { "epoch": 15.88, "learning_rate": 0.00014257434542809335, "loss": 0.7047, "step": 686000 }, { "epoch": 15.93, "learning_rate": 0.00014210654345802332, "loss": 0.7154, "step": 688000 }, { "epoch": 15.98, "learning_rate": 0.0001416387414879533, "loss": 0.6993, "step": 690000 }, { "epoch": 16.02, "learning_rate": 0.00014117093951788326, "loss": 0.6969, "step": 692000 }, { "epoch": 16.07, "learning_rate": 0.00014070313754781325, "loss": 0.689, "step": 694000 }, { "epoch": 16.12, "learning_rate": 0.00014023533557774322, "loss": 0.6888, "step": 696000 }, { "epoch": 16.16, "learning_rate": 0.0001397675336076732, "loss": 0.6818, "step": 698000 }, { "epoch": 16.21, "learning_rate": 0.00013929973163760316, "loss": 0.693, "step": 700000 }, { "epoch": 16.26, "learning_rate": 0.00013883192966753313, "loss": 0.6909, "step": 702000 }, { "epoch": 16.3, "learning_rate": 0.0001383641276974631, "loss": 0.6873, "step": 704000 }, { "epoch": 16.35, "learning_rate": 0.00013789632572739307, "loss": 0.6906, "step": 706000 }, { "epoch": 16.39, "learning_rate": 0.00013742852375732303, "loss": 0.6866, "step": 708000 }, { "epoch": 16.44, "learning_rate": 0.000136960721787253, "loss": 0.701, "step": 710000 }, { "epoch": 16.49, "learning_rate": 0.00013649291981718297, "loss": 0.6937, "step": 712000 }, { "epoch": 16.53, "learning_rate": 0.00013602511784711294, "loss": 0.6907, "step": 714000 }, { "epoch": 16.58, "learning_rate": 0.0001355573158770429, "loss": 0.6897, "step": 716000 }, { "epoch": 16.63, "learning_rate": 0.00013508951390697288, "loss": 0.6952, "step": 718000 }, { "epoch": 16.67, "learning_rate": 0.00013462171193690285, "loss": 0.6865, "step": 720000 }, { "epoch": 16.72, "learning_rate": 0.00013415390996683282, "loss": 0.6935, "step": 722000 }, { "epoch": 16.76, "learning_rate": 0.00013368610799676278, "loss": 0.6919, "step": 724000 }, { "epoch": 16.81, "learning_rate": 0.00013321830602669275, "loss": 0.6904, "step": 726000 }, { "epoch": 16.86, "learning_rate": 0.00013275050405662272, "loss": 0.6964, "step": 728000 }, { "epoch": 16.9, "learning_rate": 0.00013228270208655272, "loss": 0.6943, "step": 730000 }, { "epoch": 16.95, "learning_rate": 0.0001318149001164827, "loss": 0.6949, "step": 732000 }, { "epoch": 17.0, "learning_rate": 0.00013134709814641266, "loss": 0.6943, "step": 734000 }, { "epoch": 17.04, "learning_rate": 0.00013087929617634263, "loss": 0.6851, "step": 736000 }, { "epoch": 17.09, "learning_rate": 0.0001304114942062726, "loss": 0.6802, "step": 738000 }, { "epoch": 17.14, "learning_rate": 0.00012994369223620256, "loss": 0.6801, "step": 740000 }, { "epoch": 17.18, "learning_rate": 0.00012947589026613253, "loss": 0.6756, "step": 742000 }, { "epoch": 17.23, "learning_rate": 0.0001290080882960625, "loss": 0.6824, "step": 744000 }, { "epoch": 17.27, "learning_rate": 0.00012854028632599247, "loss": 0.6894, "step": 746000 }, { "epoch": 17.32, "learning_rate": 0.00012807248435592244, "loss": 0.682, "step": 748000 }, { "epoch": 17.37, "learning_rate": 0.0001276046823858524, "loss": 0.6814, "step": 750000 }, { "epoch": 17.41, "learning_rate": 0.00012713688041578238, "loss": 0.6737, "step": 752000 }, { "epoch": 17.46, "learning_rate": 0.00012666907844571234, "loss": 0.6874, "step": 754000 }, { "epoch": 17.51, "learning_rate": 0.00012620127647564231, "loss": 0.6842, "step": 756000 }, { "epoch": 17.55, "learning_rate": 0.00012573347450557228, "loss": 0.6871, "step": 758000 }, { "epoch": 17.6, "learning_rate": 0.00012526567253550225, "loss": 0.6833, "step": 760000 }, { "epoch": 17.64, "learning_rate": 0.00012479787056543222, "loss": 0.6818, "step": 762000 }, { "epoch": 17.69, "learning_rate": 0.0001243300685953622, "loss": 0.6824, "step": 764000 }, { "epoch": 17.74, "learning_rate": 0.00012386226662529219, "loss": 0.684, "step": 766000 }, { "epoch": 17.78, "learning_rate": 0.00012339446465522215, "loss": 0.6822, "step": 768000 }, { "epoch": 17.83, "learning_rate": 0.00012292666268515212, "loss": 0.68, "step": 770000 }, { "epoch": 17.88, "learning_rate": 0.0001224588607150821, "loss": 0.6842, "step": 772000 }, { "epoch": 17.92, "learning_rate": 0.00012199105874501206, "loss": 0.6827, "step": 774000 }, { "epoch": 17.97, "learning_rate": 0.00012152325677494203, "loss": 0.6901, "step": 776000 }, { "epoch": 18.02, "learning_rate": 0.000121055454804872, "loss": 0.6781, "step": 778000 }, { "epoch": 18.06, "learning_rate": 0.00012058765283480197, "loss": 0.6768, "step": 780000 }, { "epoch": 18.11, "learning_rate": 0.00012011985086473194, "loss": 0.6704, "step": 782000 }, { "epoch": 18.15, "learning_rate": 0.0001196520488946619, "loss": 0.6767, "step": 784000 }, { "epoch": 18.2, "learning_rate": 0.00011918424692459187, "loss": 0.6696, "step": 786000 }, { "epoch": 18.25, "learning_rate": 0.00011871644495452184, "loss": 0.6717, "step": 788000 }, { "epoch": 18.29, "learning_rate": 0.00011824864298445181, "loss": 0.6666, "step": 790000 }, { "epoch": 18.34, "learning_rate": 0.00011778084101438178, "loss": 0.6681, "step": 792000 }, { "epoch": 18.39, "learning_rate": 0.00011731303904431175, "loss": 0.6688, "step": 794000 }, { "epoch": 18.43, "learning_rate": 0.00011684523707424172, "loss": 0.6809, "step": 796000 }, { "epoch": 18.48, "learning_rate": 0.00011637743510417169, "loss": 0.6704, "step": 798000 }, { "epoch": 18.52, "learning_rate": 0.00011590963313410166, "loss": 0.6732, "step": 800000 }, { "epoch": 18.57, "learning_rate": 0.00011544183116403164, "loss": 0.6688, "step": 802000 }, { "epoch": 18.62, "learning_rate": 0.00011497402919396161, "loss": 0.6767, "step": 804000 }, { "epoch": 18.66, "learning_rate": 0.00011450622722389158, "loss": 0.6721, "step": 806000 }, { "epoch": 18.71, "learning_rate": 0.00011403842525382154, "loss": 0.6716, "step": 808000 }, { "epoch": 18.76, "learning_rate": 0.00011357062328375151, "loss": 0.673, "step": 810000 }, { "epoch": 18.8, "learning_rate": 0.00011310282131368148, "loss": 0.6717, "step": 812000 }, { "epoch": 18.85, "learning_rate": 0.00011263501934361145, "loss": 0.6607, "step": 814000 }, { "epoch": 18.9, "learning_rate": 0.00011216721737354142, "loss": 0.6732, "step": 816000 }, { "epoch": 18.94, "learning_rate": 0.00011169941540347139, "loss": 0.6715, "step": 818000 }, { "epoch": 18.99, "learning_rate": 0.00011123161343340136, "loss": 0.678, "step": 820000 }, { "epoch": 19.03, "learning_rate": 0.00011076381146333133, "loss": 0.6618, "step": 822000 }, { "epoch": 19.08, "learning_rate": 0.0001102960094932613, "loss": 0.6589, "step": 824000 }, { "epoch": 19.13, "learning_rate": 0.00010982820752319126, "loss": 0.6624, "step": 826000 }, { "epoch": 19.17, "learning_rate": 0.00010936040555312123, "loss": 0.6618, "step": 828000 }, { "epoch": 19.22, "learning_rate": 0.0001088926035830512, "loss": 0.6666, "step": 830000 }, { "epoch": 19.27, "learning_rate": 0.00010842480161298117, "loss": 0.6645, "step": 832000 }, { "epoch": 19.31, "learning_rate": 0.00010795699964291114, "loss": 0.6667, "step": 834000 }, { "epoch": 19.36, "learning_rate": 0.00010748919767284111, "loss": 0.6649, "step": 836000 }, { "epoch": 19.4, "learning_rate": 0.0001070213957027711, "loss": 0.659, "step": 838000 }, { "epoch": 19.45, "learning_rate": 0.00010655359373270107, "loss": 0.6611, "step": 840000 }, { "epoch": 19.5, "learning_rate": 0.00010608579176263104, "loss": 0.6565, "step": 842000 }, { "epoch": 19.54, "learning_rate": 0.00010561798979256101, "loss": 0.6631, "step": 844000 }, { "epoch": 19.59, "learning_rate": 0.00010515018782249098, "loss": 0.6593, "step": 846000 }, { "epoch": 19.64, "learning_rate": 0.00010468238585242095, "loss": 0.6654, "step": 848000 }, { "epoch": 19.68, "learning_rate": 0.00010421458388235092, "loss": 0.6621, "step": 850000 }, { "epoch": 19.73, "learning_rate": 0.00010374678191228089, "loss": 0.661, "step": 852000 }, { "epoch": 19.78, "learning_rate": 0.00010327897994221086, "loss": 0.6515, "step": 854000 }, { "epoch": 19.82, "learning_rate": 0.00010281117797214082, "loss": 0.6614, "step": 856000 }, { "epoch": 19.87, "learning_rate": 0.0001023433760020708, "loss": 0.6616, "step": 858000 }, { "epoch": 19.91, "learning_rate": 0.00010187557403200076, "loss": 0.6598, "step": 860000 }, { "epoch": 19.96, "learning_rate": 0.00010140777206193073, "loss": 0.6616, "step": 862000 }, { "epoch": 20.01, "learning_rate": 0.0001009399700918607, "loss": 0.6679, "step": 864000 }, { "epoch": 20.05, "learning_rate": 0.00010047216812179067, "loss": 0.6518, "step": 866000 }, { "epoch": 20.1, "learning_rate": 0.00010000436615172064, "loss": 0.6463, "step": 868000 }, { "epoch": 20.15, "learning_rate": 9.95365641816506e-05, "loss": 0.6529, "step": 870000 }, { "epoch": 20.19, "learning_rate": 9.906876221158058e-05, "loss": 0.6463, "step": 872000 }, { "epoch": 20.24, "learning_rate": 9.860096024151056e-05, "loss": 0.6545, "step": 874000 }, { "epoch": 20.28, "learning_rate": 9.813315827144053e-05, "loss": 0.6531, "step": 876000 }, { "epoch": 20.33, "learning_rate": 9.76653563013705e-05, "loss": 0.6442, "step": 878000 }, { "epoch": 20.38, "learning_rate": 9.719755433130046e-05, "loss": 0.65, "step": 880000 }, { "epoch": 20.42, "learning_rate": 9.672975236123043e-05, "loss": 0.6518, "step": 882000 }, { "epoch": 20.47, "learning_rate": 9.62619503911604e-05, "loss": 0.6546, "step": 884000 }, { "epoch": 20.52, "learning_rate": 9.579414842109037e-05, "loss": 0.6494, "step": 886000 }, { "epoch": 20.56, "learning_rate": 9.532634645102035e-05, "loss": 0.654, "step": 888000 }, { "epoch": 20.61, "learning_rate": 9.485854448095032e-05, "loss": 0.6536, "step": 890000 }, { "epoch": 20.66, "learning_rate": 9.439074251088029e-05, "loss": 0.6547, "step": 892000 }, { "epoch": 20.7, "learning_rate": 9.392294054081026e-05, "loss": 0.6421, "step": 894000 }, { "epoch": 20.75, "learning_rate": 9.345513857074023e-05, "loss": 0.6506, "step": 896000 }, { "epoch": 20.79, "learning_rate": 9.29873366006702e-05, "loss": 0.6551, "step": 898000 }, { "epoch": 20.84, "learning_rate": 9.251953463060017e-05, "loss": 0.6542, "step": 900000 }, { "epoch": 20.89, "learning_rate": 9.205173266053014e-05, "loss": 0.6398, "step": 902000 }, { "epoch": 20.93, "learning_rate": 9.15839306904601e-05, "loss": 0.653, "step": 904000 }, { "epoch": 20.98, "learning_rate": 9.111612872039007e-05, "loss": 0.6476, "step": 906000 }, { "epoch": 21.03, "learning_rate": 9.064832675032004e-05, "loss": 0.6378, "step": 908000 }, { "epoch": 21.07, "learning_rate": 9.018052478025002e-05, "loss": 0.6413, "step": 910000 }, { "epoch": 21.12, "learning_rate": 8.971272281017999e-05, "loss": 0.6368, "step": 912000 }, { "epoch": 21.16, "learning_rate": 8.924492084010996e-05, "loss": 0.6366, "step": 914000 }, { "epoch": 21.21, "learning_rate": 8.877711887003993e-05, "loss": 0.6455, "step": 916000 }, { "epoch": 21.26, "learning_rate": 8.83093168999699e-05, "loss": 0.6448, "step": 918000 }, { "epoch": 21.3, "learning_rate": 8.784151492989987e-05, "loss": 0.6371, "step": 920000 }, { "epoch": 21.35, "learning_rate": 8.737371295982984e-05, "loss": 0.6457, "step": 922000 }, { "epoch": 21.4, "learning_rate": 8.69059109897598e-05, "loss": 0.6399, "step": 924000 }, { "epoch": 21.44, "learning_rate": 8.643810901968978e-05, "loss": 0.6389, "step": 926000 }, { "epoch": 21.49, "learning_rate": 8.597030704961974e-05, "loss": 0.6444, "step": 928000 }, { "epoch": 21.54, "learning_rate": 8.550250507954971e-05, "loss": 0.6346, "step": 930000 }, { "epoch": 21.58, "learning_rate": 8.503470310947968e-05, "loss": 0.6394, "step": 932000 }, { "epoch": 21.63, "learning_rate": 8.456690113940965e-05, "loss": 0.6397, "step": 934000 }, { "epoch": 21.67, "learning_rate": 8.409909916933962e-05, "loss": 0.6411, "step": 936000 }, { "epoch": 21.72, "learning_rate": 8.363129719926959e-05, "loss": 0.6383, "step": 938000 }, { "epoch": 21.77, "learning_rate": 8.316349522919956e-05, "loss": 0.6416, "step": 940000 }, { "epoch": 21.81, "learning_rate": 8.269569325912953e-05, "loss": 0.635, "step": 942000 }, { "epoch": 21.86, "learning_rate": 8.22278912890595e-05, "loss": 0.6371, "step": 944000 }, { "epoch": 21.91, "learning_rate": 8.176008931898949e-05, "loss": 0.6412, "step": 946000 }, { "epoch": 21.95, "learning_rate": 8.129228734891946e-05, "loss": 0.6414, "step": 948000 }, { "epoch": 22.0, "learning_rate": 8.082448537884943e-05, "loss": 0.6285, "step": 950000 }, { "epoch": 22.04, "learning_rate": 8.03566834087794e-05, "loss": 0.6285, "step": 952000 }, { "epoch": 22.09, "learning_rate": 7.988888143870937e-05, "loss": 0.6268, "step": 954000 }, { "epoch": 22.14, "learning_rate": 7.942107946863934e-05, "loss": 0.6251, "step": 956000 }, { "epoch": 22.18, "learning_rate": 7.89532774985693e-05, "loss": 0.6306, "step": 958000 }, { "epoch": 22.23, "learning_rate": 7.848547552849927e-05, "loss": 0.6283, "step": 960000 }, { "epoch": 22.28, "learning_rate": 7.801767355842924e-05, "loss": 0.6264, "step": 962000 }, { "epoch": 22.32, "learning_rate": 7.754987158835921e-05, "loss": 0.6279, "step": 964000 }, { "epoch": 22.37, "learning_rate": 7.708206961828918e-05, "loss": 0.6272, "step": 966000 }, { "epoch": 22.41, "learning_rate": 7.661426764821915e-05, "loss": 0.6355, "step": 968000 }, { "epoch": 22.46, "learning_rate": 7.614646567814912e-05, "loss": 0.6349, "step": 970000 }, { "epoch": 22.51, "learning_rate": 7.567866370807909e-05, "loss": 0.6281, "step": 972000 }, { "epoch": 22.55, "learning_rate": 7.521086173800905e-05, "loss": 0.6269, "step": 974000 }, { "epoch": 22.6, "learning_rate": 7.474305976793904e-05, "loss": 0.6221, "step": 976000 }, { "epoch": 22.65, "learning_rate": 7.4275257797869e-05, "loss": 0.6295, "step": 978000 }, { "epoch": 22.69, "learning_rate": 7.380745582779897e-05, "loss": 0.6265, "step": 980000 }, { "epoch": 22.74, "learning_rate": 7.333965385772894e-05, "loss": 0.6203, "step": 982000 }, { "epoch": 22.79, "learning_rate": 7.287185188765891e-05, "loss": 0.6306, "step": 984000 }, { "epoch": 22.83, "learning_rate": 7.240404991758888e-05, "loss": 0.6319, "step": 986000 }, { "epoch": 22.88, "learning_rate": 7.193624794751885e-05, "loss": 0.6211, "step": 988000 }, { "epoch": 22.92, "learning_rate": 7.146844597744882e-05, "loss": 0.6244, "step": 990000 }, { "epoch": 22.97, "learning_rate": 7.100064400737879e-05, "loss": 0.6262, "step": 992000 }, { "epoch": 23.02, "learning_rate": 7.053284203730876e-05, "loss": 0.6166, "step": 994000 }, { "epoch": 23.06, "learning_rate": 7.006504006723873e-05, "loss": 0.6166, "step": 996000 }, { "epoch": 23.11, "learning_rate": 6.95972380971687e-05, "loss": 0.6175, "step": 998000 }, { "epoch": 23.16, "learning_rate": 6.912943612709866e-05, "loss": 0.6151, "step": 1000000 }, { "epoch": 23.2, "learning_rate": 6.866163415702863e-05, "loss": 0.6153, "step": 1002000 }, { "epoch": 23.25, "learning_rate": 6.81938321869586e-05, "loss": 0.6212, "step": 1004000 }, { "epoch": 23.29, "learning_rate": 6.772603021688858e-05, "loss": 0.6161, "step": 1006000 }, { "epoch": 23.34, "learning_rate": 6.725822824681855e-05, "loss": 0.6158, "step": 1008000 }, { "epoch": 23.39, "learning_rate": 6.679042627674852e-05, "loss": 0.6089, "step": 1010000 }, { "epoch": 23.43, "learning_rate": 6.632262430667849e-05, "loss": 0.6166, "step": 1012000 }, { "epoch": 23.48, "learning_rate": 6.585482233660846e-05, "loss": 0.6134, "step": 1014000 }, { "epoch": 23.53, "learning_rate": 6.538702036653843e-05, "loss": 0.6171, "step": 1016000 }, { "epoch": 23.57, "learning_rate": 6.49192183964684e-05, "loss": 0.6122, "step": 1018000 }, { "epoch": 23.62, "learning_rate": 6.445141642639837e-05, "loss": 0.6176, "step": 1020000 }, { "epoch": 23.67, "learning_rate": 6.398361445632833e-05, "loss": 0.6146, "step": 1022000 }, { "epoch": 23.71, "learning_rate": 6.35158124862583e-05, "loss": 0.6069, "step": 1024000 }, { "epoch": 23.76, "learning_rate": 6.304801051618829e-05, "loss": 0.6169, "step": 1026000 }, { "epoch": 23.8, "learning_rate": 6.258020854611825e-05, "loss": 0.6222, "step": 1028000 }, { "epoch": 23.85, "learning_rate": 6.211240657604822e-05, "loss": 0.6152, "step": 1030000 }, { "epoch": 23.9, "learning_rate": 6.164460460597819e-05, "loss": 0.6181, "step": 1032000 }, { "epoch": 23.94, "learning_rate": 6.117680263590816e-05, "loss": 0.6123, "step": 1034000 }, { "epoch": 23.99, "learning_rate": 6.070900066583813e-05, "loss": 0.619, "step": 1036000 }, { "epoch": 24.04, "learning_rate": 6.02411986957681e-05, "loss": 0.6099, "step": 1038000 }, { "epoch": 24.08, "learning_rate": 5.977339672569807e-05, "loss": 0.6098, "step": 1040000 }, { "epoch": 24.13, "learning_rate": 5.930559475562804e-05, "loss": 0.5965, "step": 1042000 }, { "epoch": 24.17, "learning_rate": 5.883779278555802e-05, "loss": 0.6059, "step": 1044000 }, { "epoch": 24.22, "learning_rate": 5.836999081548799e-05, "loss": 0.6021, "step": 1046000 }, { "epoch": 24.27, "learning_rate": 5.790218884541796e-05, "loss": 0.6093, "step": 1048000 }, { "epoch": 24.31, "learning_rate": 5.7434386875347926e-05, "loss": 0.6031, "step": 1050000 }, { "epoch": 24.36, "learning_rate": 5.6966584905277895e-05, "loss": 0.6053, "step": 1052000 }, { "epoch": 24.41, "learning_rate": 5.6498782935207863e-05, "loss": 0.6036, "step": 1054000 }, { "epoch": 24.45, "learning_rate": 5.603098096513783e-05, "loss": 0.6011, "step": 1056000 }, { "epoch": 24.5, "learning_rate": 5.55631789950678e-05, "loss": 0.6035, "step": 1058000 }, { "epoch": 24.55, "learning_rate": 5.509537702499777e-05, "loss": 0.6066, "step": 1060000 }, { "epoch": 24.59, "learning_rate": 5.4627575054927746e-05, "loss": 0.6061, "step": 1062000 }, { "epoch": 24.64, "learning_rate": 5.4159773084857714e-05, "loss": 0.6027, "step": 1064000 }, { "epoch": 24.68, "learning_rate": 5.369197111478768e-05, "loss": 0.6, "step": 1066000 }, { "epoch": 24.73, "learning_rate": 5.322416914471765e-05, "loss": 0.6062, "step": 1068000 }, { "epoch": 24.78, "learning_rate": 5.275636717464762e-05, "loss": 0.6003, "step": 1070000 }, { "epoch": 24.82, "learning_rate": 5.228856520457759e-05, "loss": 0.5988, "step": 1072000 }, { "epoch": 24.87, "learning_rate": 5.182076323450756e-05, "loss": 0.6096, "step": 1074000 }, { "epoch": 24.92, "learning_rate": 5.135296126443753e-05, "loss": 0.5988, "step": 1076000 }, { "epoch": 24.96, "learning_rate": 5.08851592943675e-05, "loss": 0.6086, "step": 1078000 }, { "epoch": 25.01, "learning_rate": 5.041735732429748e-05, "loss": 0.5942, "step": 1080000 }, { "epoch": 25.05, "learning_rate": 4.994955535422745e-05, "loss": 0.5954, "step": 1082000 }, { "epoch": 25.1, "learning_rate": 4.9481753384157417e-05, "loss": 0.5948, "step": 1084000 }, { "epoch": 25.15, "learning_rate": 4.9013951414087385e-05, "loss": 0.5946, "step": 1086000 }, { "epoch": 25.19, "learning_rate": 4.8546149444017354e-05, "loss": 0.5938, "step": 1088000 }, { "epoch": 25.24, "learning_rate": 4.807834747394732e-05, "loss": 0.5961, "step": 1090000 }, { "epoch": 25.29, "learning_rate": 4.761054550387729e-05, "loss": 0.5947, "step": 1092000 }, { "epoch": 25.33, "learning_rate": 4.714274353380726e-05, "loss": 0.6019, "step": 1094000 }, { "epoch": 25.38, "learning_rate": 4.667494156373723e-05, "loss": 0.5927, "step": 1096000 }, { "epoch": 25.43, "learning_rate": 4.6207139593667205e-05, "loss": 0.5921, "step": 1098000 }, { "epoch": 25.47, "learning_rate": 4.5739337623597174e-05, "loss": 0.5954, "step": 1100000 }, { "epoch": 25.52, "learning_rate": 4.527153565352715e-05, "loss": 0.5926, "step": 1102000 }, { "epoch": 25.56, "learning_rate": 4.480373368345712e-05, "loss": 0.5963, "step": 1104000 }, { "epoch": 25.61, "learning_rate": 4.433593171338709e-05, "loss": 0.5902, "step": 1106000 }, { "epoch": 25.66, "learning_rate": 4.3868129743317056e-05, "loss": 0.5952, "step": 1108000 }, { "epoch": 25.7, "learning_rate": 4.3400327773247025e-05, "loss": 0.5878, "step": 1110000 }, { "epoch": 25.75, "learning_rate": 4.2932525803176994e-05, "loss": 0.5926, "step": 1112000 }, { "epoch": 25.8, "learning_rate": 4.246472383310696e-05, "loss": 0.5854, "step": 1114000 }, { "epoch": 25.84, "learning_rate": 4.199692186303694e-05, "loss": 0.5916, "step": 1116000 }, { "epoch": 25.89, "learning_rate": 4.152911989296691e-05, "loss": 0.5869, "step": 1118000 }, { "epoch": 25.93, "learning_rate": 4.1061317922896876e-05, "loss": 0.5913, "step": 1120000 }, { "epoch": 25.98, "learning_rate": 4.0593515952826845e-05, "loss": 0.5822, "step": 1122000 }, { "epoch": 26.03, "learning_rate": 4.0125713982756814e-05, "loss": 0.5831, "step": 1124000 }, { "epoch": 26.07, "learning_rate": 3.965791201268678e-05, "loss": 0.5847, "step": 1126000 }, { "epoch": 26.12, "learning_rate": 3.919011004261675e-05, "loss": 0.5828, "step": 1128000 }, { "epoch": 26.17, "learning_rate": 3.872230807254672e-05, "loss": 0.5825, "step": 1130000 }, { "epoch": 26.21, "learning_rate": 3.825450610247669e-05, "loss": 0.5848, "step": 1132000 }, { "epoch": 26.26, "learning_rate": 3.778670413240667e-05, "loss": 0.5866, "step": 1134000 }, { "epoch": 26.31, "learning_rate": 3.7318902162336634e-05, "loss": 0.5832, "step": 1136000 }, { "epoch": 26.35, "learning_rate": 3.685110019226661e-05, "loss": 0.58, "step": 1138000 }, { "epoch": 26.4, "learning_rate": 3.638329822219658e-05, "loss": 0.5767, "step": 1140000 }, { "epoch": 26.44, "learning_rate": 3.591549625212655e-05, "loss": 0.5792, "step": 1142000 }, { "epoch": 26.49, "learning_rate": 3.5447694282056516e-05, "loss": 0.5764, "step": 1144000 }, { "epoch": 26.54, "learning_rate": 3.4979892311986485e-05, "loss": 0.5794, "step": 1146000 }, { "epoch": 26.58, "learning_rate": 3.451209034191646e-05, "loss": 0.5738, "step": 1148000 }, { "epoch": 26.63, "learning_rate": 3.404428837184643e-05, "loss": 0.5822, "step": 1150000 }, { "epoch": 26.68, "learning_rate": 3.35764864017764e-05, "loss": 0.5734, "step": 1152000 }, { "epoch": 26.72, "learning_rate": 3.310868443170637e-05, "loss": 0.5794, "step": 1154000 }, { "epoch": 26.77, "learning_rate": 3.2640882461636336e-05, "loss": 0.5853, "step": 1156000 }, { "epoch": 26.81, "learning_rate": 3.2173080491566305e-05, "loss": 0.5842, "step": 1158000 }, { "epoch": 26.86, "learning_rate": 3.170527852149628e-05, "loss": 0.5847, "step": 1160000 }, { "epoch": 26.91, "learning_rate": 3.123747655142625e-05, "loss": 0.5786, "step": 1162000 }, { "epoch": 26.95, "learning_rate": 3.076967458135622e-05, "loss": 0.5818, "step": 1164000 }, { "epoch": 27.0, "learning_rate": 3.030187261128619e-05, "loss": 0.5722, "step": 1166000 }, { "epoch": 27.05, "learning_rate": 2.983407064121616e-05, "loss": 0.5726, "step": 1168000 }, { "epoch": 27.09, "learning_rate": 2.936626867114613e-05, "loss": 0.5745, "step": 1170000 }, { "epoch": 27.14, "learning_rate": 2.8898466701076097e-05, "loss": 0.5655, "step": 1172000 }, { "epoch": 27.19, "learning_rate": 2.843066473100607e-05, "loss": 0.5747, "step": 1174000 }, { "epoch": 27.23, "learning_rate": 2.7962862760936038e-05, "loss": 0.5734, "step": 1176000 }, { "epoch": 27.28, "learning_rate": 2.7495060790866007e-05, "loss": 0.5752, "step": 1178000 }, { "epoch": 27.32, "learning_rate": 2.7027258820795976e-05, "loss": 0.5784, "step": 1180000 }, { "epoch": 27.37, "learning_rate": 2.6559456850725945e-05, "loss": 0.5667, "step": 1182000 }, { "epoch": 27.42, "learning_rate": 2.609165488065592e-05, "loss": 0.5748, "step": 1184000 }, { "epoch": 27.46, "learning_rate": 2.562385291058589e-05, "loss": 0.5762, "step": 1186000 }, { "epoch": 27.51, "learning_rate": 2.5156050940515858e-05, "loss": 0.5783, "step": 1188000 }, { "epoch": 27.56, "learning_rate": 2.4688248970445827e-05, "loss": 0.5668, "step": 1190000 }, { "epoch": 27.6, "learning_rate": 2.42204470003758e-05, "loss": 0.5671, "step": 1192000 }, { "epoch": 27.65, "learning_rate": 2.3752645030305768e-05, "loss": 0.5688, "step": 1194000 }, { "epoch": 27.69, "learning_rate": 2.328484306023574e-05, "loss": 0.5643, "step": 1196000 }, { "epoch": 27.74, "learning_rate": 2.281704109016571e-05, "loss": 0.5688, "step": 1198000 }, { "epoch": 27.79, "learning_rate": 2.2349239120095678e-05, "loss": 0.5651, "step": 1200000 }, { "epoch": 27.83, "learning_rate": 2.188143715002565e-05, "loss": 0.5705, "step": 1202000 }, { "epoch": 27.88, "learning_rate": 2.141363517995562e-05, "loss": 0.5684, "step": 1204000 }, { "epoch": 27.93, "learning_rate": 2.0945833209885588e-05, "loss": 0.567, "step": 1206000 }, { "epoch": 27.97, "learning_rate": 2.0478031239815557e-05, "loss": 0.5711, "step": 1208000 }, { "epoch": 28.02, "learning_rate": 2.0010229269745533e-05, "loss": 0.5684, "step": 1210000 }, { "epoch": 28.06, "learning_rate": 1.95424272996755e-05, "loss": 0.5604, "step": 1212000 }, { "epoch": 28.11, "learning_rate": 1.907462532960547e-05, "loss": 0.5615, "step": 1214000 }, { "epoch": 28.16, "learning_rate": 1.860682335953544e-05, "loss": 0.5679, "step": 1216000 }, { "epoch": 28.2, "learning_rate": 1.813902138946541e-05, "loss": 0.5644, "step": 1218000 }, { "epoch": 28.25, "learning_rate": 1.767121941939538e-05, "loss": 0.5663, "step": 1220000 }, { "epoch": 28.3, "learning_rate": 1.720341744932535e-05, "loss": 0.5584, "step": 1222000 }, { "epoch": 28.34, "learning_rate": 1.6735615479255318e-05, "loss": 0.558, "step": 1224000 }, { "epoch": 28.39, "learning_rate": 1.626781350918529e-05, "loss": 0.5575, "step": 1226000 }, { "epoch": 28.44, "learning_rate": 1.580001153911526e-05, "loss": 0.5728, "step": 1228000 }, { "epoch": 28.48, "learning_rate": 1.533220956904523e-05, "loss": 0.5653, "step": 1230000 }, { "epoch": 28.53, "learning_rate": 1.48644075989752e-05, "loss": 0.5603, "step": 1232000 }, { "epoch": 28.57, "learning_rate": 1.4396605628905172e-05, "loss": 0.5613, "step": 1234000 }, { "epoch": 28.62, "learning_rate": 1.3928803658835141e-05, "loss": 0.5563, "step": 1236000 }, { "epoch": 28.67, "learning_rate": 1.346100168876511e-05, "loss": 0.5705, "step": 1238000 }, { "epoch": 28.71, "learning_rate": 1.299319971869508e-05, "loss": 0.5568, "step": 1240000 }, { "epoch": 28.76, "learning_rate": 1.252539774862505e-05, "loss": 0.5517, "step": 1242000 }, { "epoch": 28.81, "learning_rate": 1.2057595778555022e-05, "loss": 0.5647, "step": 1244000 }, { "epoch": 28.85, "learning_rate": 1.158979380848499e-05, "loss": 0.5551, "step": 1246000 }, { "epoch": 28.9, "learning_rate": 1.1121991838414961e-05, "loss": 0.5598, "step": 1248000 }, { "epoch": 28.94, "learning_rate": 1.0654189868344932e-05, "loss": 0.562, "step": 1250000 }, { "epoch": 28.99, "learning_rate": 1.0186387898274902e-05, "loss": 0.5563, "step": 1252000 }, { "epoch": 29.04, "learning_rate": 9.718585928204871e-06, "loss": 0.5606, "step": 1254000 }, { "epoch": 29.08, "learning_rate": 9.250783958134842e-06, "loss": 0.5509, "step": 1256000 }, { "epoch": 29.13, "learning_rate": 8.782981988064812e-06, "loss": 0.551, "step": 1258000 }, { "epoch": 29.18, "learning_rate": 8.315180017994783e-06, "loss": 0.5548, "step": 1260000 }, { "epoch": 29.22, "learning_rate": 7.847378047924752e-06, "loss": 0.5562, "step": 1262000 }, { "epoch": 29.27, "learning_rate": 7.3795760778547214e-06, "loss": 0.5563, "step": 1264000 }, { "epoch": 29.32, "learning_rate": 6.911774107784692e-06, "loss": 0.5551, "step": 1266000 }, { "epoch": 29.36, "learning_rate": 6.443972137714662e-06, "loss": 0.555, "step": 1268000 }, { "epoch": 29.41, "learning_rate": 5.976170167644632e-06, "loss": 0.554, "step": 1270000 }, { "epoch": 29.45, "learning_rate": 5.508368197574602e-06, "loss": 0.5522, "step": 1272000 }, { "epoch": 29.5, "learning_rate": 5.0405662275045725e-06, "loss": 0.5522, "step": 1274000 }, { "epoch": 29.55, "learning_rate": 4.572764257434542e-06, "loss": 0.5601, "step": 1276000 }, { "epoch": 29.59, "learning_rate": 4.104962287364513e-06, "loss": 0.5578, "step": 1278000 }, { "epoch": 29.64, "learning_rate": 3.6371603172944825e-06, "loss": 0.5602, "step": 1280000 }, { "epoch": 29.69, "learning_rate": 3.1693583472244526e-06, "loss": 0.5517, "step": 1282000 }, { "epoch": 29.73, "learning_rate": 2.701556377154423e-06, "loss": 0.5538, "step": 1284000 }, { "epoch": 29.78, "learning_rate": 2.233754407084393e-06, "loss": 0.549, "step": 1286000 }, { "epoch": 29.82, "learning_rate": 1.765952437014363e-06, "loss": 0.5595, "step": 1288000 }, { "epoch": 29.87, "learning_rate": 1.2981504669443331e-06, "loss": 0.5546, "step": 1290000 }, { "epoch": 29.92, "learning_rate": 8.303484968743031e-07, "loss": 0.5547, "step": 1292000 }, { "epoch": 29.96, "learning_rate": 3.6254652680427316e-07, "loss": 0.5494, "step": 1294000 }, { "epoch": 30.0, "step": 1295550, "total_flos": 2.6480449905256835e+21, "train_loss": 0.7350748663054241, "train_runtime": 658563.1153, "train_samples_per_second": 31.476, "train_steps_per_second": 1.967 } ], "logging_steps": 2000, "max_steps": 1295550, "num_train_epochs": 30, "save_steps": 500, "total_flos": 2.6480449905256835e+21, "trial_name": null, "trial_params": null }