yesj1234's picture
Upload folder using huggingface_hub
d923e5b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 1273800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 0.0003,
"loss": 3.0982,
"step": 2000
},
{
"epoch": 0.09,
"learning_rate": 0.0002995282277087592,
"loss": 1.1999,
"step": 4000
},
{
"epoch": 0.14,
"learning_rate": 0.00029905645541751844,
"loss": 1.0995,
"step": 6000
},
{
"epoch": 0.19,
"learning_rate": 0.0002985846831262777,
"loss": 1.0716,
"step": 8000
},
{
"epoch": 0.24,
"learning_rate": 0.0002981129108350369,
"loss": 1.0664,
"step": 10000
},
{
"epoch": 0.28,
"learning_rate": 0.0002976411385437962,
"loss": 1.0223,
"step": 12000
},
{
"epoch": 0.33,
"learning_rate": 0.00029716936625255537,
"loss": 0.9955,
"step": 14000
},
{
"epoch": 0.38,
"learning_rate": 0.00029669759396131466,
"loss": 0.9925,
"step": 16000
},
{
"epoch": 0.42,
"learning_rate": 0.0002962258216700739,
"loss": 0.9951,
"step": 18000
},
{
"epoch": 0.47,
"learning_rate": 0.0002957540493788331,
"loss": 0.9811,
"step": 20000
},
{
"epoch": 0.52,
"learning_rate": 0.00029528227708759236,
"loss": 0.9727,
"step": 22000
},
{
"epoch": 0.57,
"learning_rate": 0.0002948105047963516,
"loss": 0.9655,
"step": 24000
},
{
"epoch": 0.61,
"learning_rate": 0.0002943387325051109,
"loss": 0.959,
"step": 26000
},
{
"epoch": 0.66,
"learning_rate": 0.00029386696021387005,
"loss": 0.9591,
"step": 28000
},
{
"epoch": 0.71,
"learning_rate": 0.00029339518792262934,
"loss": 0.9455,
"step": 30000
},
{
"epoch": 0.75,
"learning_rate": 0.0002929234156313886,
"loss": 0.9425,
"step": 32000
},
{
"epoch": 0.8,
"learning_rate": 0.0002924516433401478,
"loss": 0.9373,
"step": 34000
},
{
"epoch": 0.85,
"learning_rate": 0.00029197987104890704,
"loss": 0.9341,
"step": 36000
},
{
"epoch": 0.89,
"learning_rate": 0.00029150809875766627,
"loss": 0.93,
"step": 38000
},
{
"epoch": 0.94,
"learning_rate": 0.0002910363264664255,
"loss": 0.9184,
"step": 40000
},
{
"epoch": 0.99,
"learning_rate": 0.00029056455417518474,
"loss": 0.9222,
"step": 42000
},
{
"epoch": 1.04,
"learning_rate": 0.000290092781883944,
"loss": 0.8941,
"step": 44000
},
{
"epoch": 1.08,
"learning_rate": 0.0002896210095927032,
"loss": 0.8868,
"step": 46000
},
{
"epoch": 1.13,
"learning_rate": 0.0002891492373014625,
"loss": 0.8894,
"step": 48000
},
{
"epoch": 1.18,
"learning_rate": 0.0002886774650102217,
"loss": 0.8799,
"step": 50000
},
{
"epoch": 1.22,
"learning_rate": 0.00028820569271898095,
"loss": 0.8781,
"step": 52000
},
{
"epoch": 1.27,
"learning_rate": 0.0002877339204277402,
"loss": 0.8853,
"step": 54000
},
{
"epoch": 1.32,
"learning_rate": 0.0002872621481364994,
"loss": 0.8829,
"step": 56000
},
{
"epoch": 1.37,
"learning_rate": 0.00028679037584525865,
"loss": 0.8747,
"step": 58000
},
{
"epoch": 1.41,
"learning_rate": 0.0002863186035540179,
"loss": 0.8806,
"step": 60000
},
{
"epoch": 1.46,
"learning_rate": 0.00028584683126277717,
"loss": 0.8868,
"step": 62000
},
{
"epoch": 1.51,
"learning_rate": 0.00028537505897153635,
"loss": 0.8806,
"step": 64000
},
{
"epoch": 1.55,
"learning_rate": 0.00028490328668029564,
"loss": 0.8771,
"step": 66000
},
{
"epoch": 1.6,
"learning_rate": 0.00028443151438905487,
"loss": 0.8648,
"step": 68000
},
{
"epoch": 1.65,
"learning_rate": 0.0002839597420978141,
"loss": 0.8747,
"step": 70000
},
{
"epoch": 1.7,
"learning_rate": 0.00028348796980657334,
"loss": 0.8719,
"step": 72000
},
{
"epoch": 1.74,
"learning_rate": 0.00028301619751533257,
"loss": 0.8641,
"step": 74000
},
{
"epoch": 1.79,
"learning_rate": 0.0002825444252240918,
"loss": 0.8555,
"step": 76000
},
{
"epoch": 1.84,
"learning_rate": 0.00028207265293285103,
"loss": 0.8583,
"step": 78000
},
{
"epoch": 1.88,
"learning_rate": 0.0002816008806416103,
"loss": 0.8624,
"step": 80000
},
{
"epoch": 1.93,
"learning_rate": 0.0002811291083503695,
"loss": 0.8578,
"step": 82000
},
{
"epoch": 1.98,
"learning_rate": 0.0002806573360591288,
"loss": 0.8474,
"step": 84000
},
{
"epoch": 2.03,
"learning_rate": 0.000280185563767888,
"loss": 0.8366,
"step": 86000
},
{
"epoch": 2.07,
"learning_rate": 0.00027971379147664725,
"loss": 0.8226,
"step": 88000
},
{
"epoch": 2.12,
"learning_rate": 0.0002792420191854065,
"loss": 0.8267,
"step": 90000
},
{
"epoch": 2.17,
"learning_rate": 0.0002787702468941657,
"loss": 0.8231,
"step": 92000
},
{
"epoch": 2.21,
"learning_rate": 0.00027829847460292495,
"loss": 0.8284,
"step": 94000
},
{
"epoch": 2.26,
"learning_rate": 0.0002778267023116842,
"loss": 0.8153,
"step": 96000
},
{
"epoch": 2.31,
"learning_rate": 0.00027735493002044347,
"loss": 0.8241,
"step": 98000
},
{
"epoch": 2.36,
"learning_rate": 0.00027688315772920265,
"loss": 0.8246,
"step": 100000
},
{
"epoch": 2.4,
"learning_rate": 0.00027641138543796194,
"loss": 0.8219,
"step": 102000
},
{
"epoch": 2.45,
"learning_rate": 0.00027593961314672117,
"loss": 0.8244,
"step": 104000
},
{
"epoch": 2.5,
"learning_rate": 0.0002754678408554804,
"loss": 0.8201,
"step": 106000
},
{
"epoch": 2.54,
"learning_rate": 0.00027499606856423963,
"loss": 0.8193,
"step": 108000
},
{
"epoch": 2.59,
"learning_rate": 0.00027452429627299887,
"loss": 0.8225,
"step": 110000
},
{
"epoch": 2.64,
"learning_rate": 0.0002740525239817581,
"loss": 0.8105,
"step": 112000
},
{
"epoch": 2.68,
"learning_rate": 0.00027358075169051733,
"loss": 0.82,
"step": 114000
},
{
"epoch": 2.73,
"learning_rate": 0.0002731089793992766,
"loss": 0.8191,
"step": 116000
},
{
"epoch": 2.78,
"learning_rate": 0.00027263720710803585,
"loss": 0.8102,
"step": 118000
},
{
"epoch": 2.83,
"learning_rate": 0.0002721654348167951,
"loss": 0.8189,
"step": 120000
},
{
"epoch": 2.87,
"learning_rate": 0.0002716936625255543,
"loss": 0.8128,
"step": 122000
},
{
"epoch": 2.92,
"learning_rate": 0.00027122189023431355,
"loss": 0.815,
"step": 124000
},
{
"epoch": 2.97,
"learning_rate": 0.0002707501179430728,
"loss": 0.8123,
"step": 126000
},
{
"epoch": 3.01,
"learning_rate": 0.000270278345651832,
"loss": 0.7986,
"step": 128000
},
{
"epoch": 3.06,
"learning_rate": 0.00026980657336059125,
"loss": 0.7701,
"step": 130000
},
{
"epoch": 3.11,
"learning_rate": 0.0002693348010693505,
"loss": 0.7819,
"step": 132000
},
{
"epoch": 3.16,
"learning_rate": 0.00026886302877810977,
"loss": 0.7826,
"step": 134000
},
{
"epoch": 3.2,
"learning_rate": 0.000268391256486869,
"loss": 0.7868,
"step": 136000
},
{
"epoch": 3.25,
"learning_rate": 0.00026791948419562823,
"loss": 0.7843,
"step": 138000
},
{
"epoch": 3.3,
"learning_rate": 0.00026744771190438747,
"loss": 0.7841,
"step": 140000
},
{
"epoch": 3.34,
"learning_rate": 0.0002669759396131467,
"loss": 0.7845,
"step": 142000
},
{
"epoch": 3.39,
"learning_rate": 0.00026650416732190593,
"loss": 0.7859,
"step": 144000
},
{
"epoch": 3.44,
"learning_rate": 0.00026603239503066516,
"loss": 0.7746,
"step": 146000
},
{
"epoch": 3.49,
"learning_rate": 0.0002655606227394244,
"loss": 0.7749,
"step": 148000
},
{
"epoch": 3.53,
"learning_rate": 0.00026508885044818363,
"loss": 0.7747,
"step": 150000
},
{
"epoch": 3.58,
"learning_rate": 0.0002646170781569429,
"loss": 0.7816,
"step": 152000
},
{
"epoch": 3.63,
"learning_rate": 0.00026414530586570215,
"loss": 0.7807,
"step": 154000
},
{
"epoch": 3.67,
"learning_rate": 0.0002636735335744614,
"loss": 0.7817,
"step": 156000
},
{
"epoch": 3.72,
"learning_rate": 0.0002632017612832206,
"loss": 0.787,
"step": 158000
},
{
"epoch": 3.77,
"learning_rate": 0.00026272998899197985,
"loss": 0.7796,
"step": 160000
},
{
"epoch": 3.82,
"learning_rate": 0.0002622582167007391,
"loss": 0.7662,
"step": 162000
},
{
"epoch": 3.86,
"learning_rate": 0.0002617864444094983,
"loss": 0.7898,
"step": 164000
},
{
"epoch": 3.91,
"learning_rate": 0.00026131467211825755,
"loss": 0.7705,
"step": 166000
},
{
"epoch": 3.96,
"learning_rate": 0.0002608428998270168,
"loss": 0.7876,
"step": 168000
},
{
"epoch": 4.0,
"learning_rate": 0.00026037112753577607,
"loss": 0.7747,
"step": 170000
},
{
"epoch": 4.05,
"learning_rate": 0.0002598993552445353,
"loss": 0.7517,
"step": 172000
},
{
"epoch": 4.1,
"learning_rate": 0.00025942758295329453,
"loss": 0.7465,
"step": 174000
},
{
"epoch": 4.15,
"learning_rate": 0.00025895581066205376,
"loss": 0.7485,
"step": 176000
},
{
"epoch": 4.19,
"learning_rate": 0.000258484038370813,
"loss": 0.7531,
"step": 178000
},
{
"epoch": 4.24,
"learning_rate": 0.00025801226607957223,
"loss": 0.7521,
"step": 180000
},
{
"epoch": 4.29,
"learning_rate": 0.00025754049378833146,
"loss": 0.7477,
"step": 182000
},
{
"epoch": 4.33,
"learning_rate": 0.0002570687214970907,
"loss": 0.7621,
"step": 184000
},
{
"epoch": 4.38,
"learning_rate": 0.00025659694920585,
"loss": 0.7552,
"step": 186000
},
{
"epoch": 4.43,
"learning_rate": 0.0002561251769146092,
"loss": 0.7592,
"step": 188000
},
{
"epoch": 4.47,
"learning_rate": 0.00025565340462336845,
"loss": 0.7508,
"step": 190000
},
{
"epoch": 4.52,
"learning_rate": 0.0002551816323321277,
"loss": 0.7547,
"step": 192000
},
{
"epoch": 4.57,
"learning_rate": 0.0002547098600408869,
"loss": 0.7439,
"step": 194000
},
{
"epoch": 4.62,
"learning_rate": 0.00025423808774964615,
"loss": 0.762,
"step": 196000
},
{
"epoch": 4.66,
"learning_rate": 0.0002537663154584054,
"loss": 0.7507,
"step": 198000
},
{
"epoch": 4.71,
"learning_rate": 0.0002532945431671646,
"loss": 0.7553,
"step": 200000
},
{
"epoch": 4.76,
"learning_rate": 0.00025282277087592384,
"loss": 0.7498,
"step": 202000
},
{
"epoch": 4.8,
"learning_rate": 0.00025235099858468313,
"loss": 0.7466,
"step": 204000
},
{
"epoch": 4.85,
"learning_rate": 0.0002518792262934423,
"loss": 0.7496,
"step": 206000
},
{
"epoch": 4.9,
"learning_rate": 0.0002514074540022016,
"loss": 0.7406,
"step": 208000
},
{
"epoch": 4.95,
"learning_rate": 0.00025093568171096083,
"loss": 0.7447,
"step": 210000
},
{
"epoch": 4.99,
"learning_rate": 0.00025046390941972006,
"loss": 0.7532,
"step": 212000
},
{
"epoch": 5.04,
"learning_rate": 0.0002499921371284793,
"loss": 0.7339,
"step": 214000
},
{
"epoch": 5.09,
"learning_rate": 0.00024952036483723853,
"loss": 0.7214,
"step": 216000
},
{
"epoch": 5.13,
"learning_rate": 0.00024904859254599776,
"loss": 0.7227,
"step": 218000
},
{
"epoch": 5.18,
"learning_rate": 0.000248576820254757,
"loss": 0.7344,
"step": 220000
},
{
"epoch": 5.23,
"learning_rate": 0.0002481050479635163,
"loss": 0.7272,
"step": 222000
},
{
"epoch": 5.28,
"learning_rate": 0.00024763327567227546,
"loss": 0.7271,
"step": 224000
},
{
"epoch": 5.32,
"learning_rate": 0.00024716150338103475,
"loss": 0.7302,
"step": 226000
},
{
"epoch": 5.37,
"learning_rate": 0.000246689731089794,
"loss": 0.7218,
"step": 228000
},
{
"epoch": 5.42,
"learning_rate": 0.0002462179587985532,
"loss": 0.7242,
"step": 230000
},
{
"epoch": 5.46,
"learning_rate": 0.00024574618650731244,
"loss": 0.7306,
"step": 232000
},
{
"epoch": 5.51,
"learning_rate": 0.0002452744142160717,
"loss": 0.7351,
"step": 234000
},
{
"epoch": 5.56,
"learning_rate": 0.0002448026419248309,
"loss": 0.7225,
"step": 236000
},
{
"epoch": 5.61,
"learning_rate": 0.00024433086963359014,
"loss": 0.7256,
"step": 238000
},
{
"epoch": 5.65,
"learning_rate": 0.0002438590973423494,
"loss": 0.7256,
"step": 240000
},
{
"epoch": 5.7,
"learning_rate": 0.00024338732505110863,
"loss": 0.7293,
"step": 242000
},
{
"epoch": 5.75,
"learning_rate": 0.0002429155527598679,
"loss": 0.7272,
"step": 244000
},
{
"epoch": 5.79,
"learning_rate": 0.00024244378046862713,
"loss": 0.7154,
"step": 246000
},
{
"epoch": 5.84,
"learning_rate": 0.00024197200817738636,
"loss": 0.7274,
"step": 248000
},
{
"epoch": 5.89,
"learning_rate": 0.00024150023588614562,
"loss": 0.7184,
"step": 250000
},
{
"epoch": 5.93,
"learning_rate": 0.00024102846359490483,
"loss": 0.7218,
"step": 252000
},
{
"epoch": 5.98,
"learning_rate": 0.00024055669130366409,
"loss": 0.7246,
"step": 254000
},
{
"epoch": 6.03,
"learning_rate": 0.00024008491901242332,
"loss": 0.7036,
"step": 256000
},
{
"epoch": 6.08,
"learning_rate": 0.00023961314672118255,
"loss": 0.6992,
"step": 258000
},
{
"epoch": 6.12,
"learning_rate": 0.00023914137442994178,
"loss": 0.7029,
"step": 260000
},
{
"epoch": 6.17,
"learning_rate": 0.00023866960213870104,
"loss": 0.7097,
"step": 262000
},
{
"epoch": 6.22,
"learning_rate": 0.00023819782984746028,
"loss": 0.7042,
"step": 264000
},
{
"epoch": 6.26,
"learning_rate": 0.0002377260575562195,
"loss": 0.7076,
"step": 266000
},
{
"epoch": 6.31,
"learning_rate": 0.00023725428526497877,
"loss": 0.6972,
"step": 268000
},
{
"epoch": 6.36,
"learning_rate": 0.00023678251297373797,
"loss": 0.7097,
"step": 270000
},
{
"epoch": 6.41,
"learning_rate": 0.00023631074068249723,
"loss": 0.7025,
"step": 272000
},
{
"epoch": 6.45,
"learning_rate": 0.00023583896839125647,
"loss": 0.7041,
"step": 274000
},
{
"epoch": 6.5,
"learning_rate": 0.0002353671961000157,
"loss": 0.7098,
"step": 276000
},
{
"epoch": 6.55,
"learning_rate": 0.00023489542380877493,
"loss": 0.704,
"step": 278000
},
{
"epoch": 6.59,
"learning_rate": 0.0002344236515175342,
"loss": 0.7046,
"step": 280000
},
{
"epoch": 6.64,
"learning_rate": 0.00023395187922629343,
"loss": 0.701,
"step": 282000
},
{
"epoch": 6.69,
"learning_rate": 0.00023348010693505266,
"loss": 0.7107,
"step": 284000
},
{
"epoch": 6.74,
"learning_rate": 0.00023300833464381192,
"loss": 0.7107,
"step": 286000
},
{
"epoch": 6.78,
"learning_rate": 0.00023253656235257112,
"loss": 0.7051,
"step": 288000
},
{
"epoch": 6.83,
"learning_rate": 0.00023206479006133038,
"loss": 0.7055,
"step": 290000
},
{
"epoch": 6.88,
"learning_rate": 0.00023159301777008962,
"loss": 0.7004,
"step": 292000
},
{
"epoch": 6.92,
"learning_rate": 0.00023112124547884885,
"loss": 0.7051,
"step": 294000
},
{
"epoch": 6.97,
"learning_rate": 0.00023064947318760808,
"loss": 0.7025,
"step": 296000
},
{
"epoch": 7.02,
"learning_rate": 0.00023017770089636734,
"loss": 0.6976,
"step": 298000
},
{
"epoch": 7.07,
"learning_rate": 0.0002297059286051266,
"loss": 0.6785,
"step": 300000
},
{
"epoch": 7.11,
"learning_rate": 0.0002292341563138858,
"loss": 0.6773,
"step": 302000
},
{
"epoch": 7.16,
"learning_rate": 0.00022876238402264507,
"loss": 0.6763,
"step": 304000
},
{
"epoch": 7.21,
"learning_rate": 0.00022829061173140427,
"loss": 0.6774,
"step": 306000
},
{
"epoch": 7.25,
"learning_rate": 0.00022781883944016353,
"loss": 0.6786,
"step": 308000
},
{
"epoch": 7.3,
"learning_rate": 0.00022734706714892276,
"loss": 0.6885,
"step": 310000
},
{
"epoch": 7.35,
"learning_rate": 0.000226875294857682,
"loss": 0.6852,
"step": 312000
},
{
"epoch": 7.4,
"learning_rate": 0.00022640352256644123,
"loss": 0.6842,
"step": 314000
},
{
"epoch": 7.44,
"learning_rate": 0.0002259317502752005,
"loss": 0.6868,
"step": 316000
},
{
"epoch": 7.49,
"learning_rate": 0.00022545997798395975,
"loss": 0.6918,
"step": 318000
},
{
"epoch": 7.54,
"learning_rate": 0.00022498820569271896,
"loss": 0.6846,
"step": 320000
},
{
"epoch": 7.58,
"learning_rate": 0.00022451643340147822,
"loss": 0.6881,
"step": 322000
},
{
"epoch": 7.63,
"learning_rate": 0.00022404466111023742,
"loss": 0.69,
"step": 324000
},
{
"epoch": 7.68,
"learning_rate": 0.00022357288881899668,
"loss": 0.6823,
"step": 326000
},
{
"epoch": 7.72,
"learning_rate": 0.00022310111652775591,
"loss": 0.6896,
"step": 328000
},
{
"epoch": 7.77,
"learning_rate": 0.00022262934423651517,
"loss": 0.6862,
"step": 330000
},
{
"epoch": 7.82,
"learning_rate": 0.00022215757194527438,
"loss": 0.6858,
"step": 332000
},
{
"epoch": 7.87,
"learning_rate": 0.00022168579965403364,
"loss": 0.691,
"step": 334000
},
{
"epoch": 7.91,
"learning_rate": 0.00022121402736279284,
"loss": 0.6895,
"step": 336000
},
{
"epoch": 7.96,
"learning_rate": 0.0002207422550715521,
"loss": 0.6924,
"step": 338000
},
{
"epoch": 8.01,
"learning_rate": 0.00022027048278031136,
"loss": 0.6903,
"step": 340000
},
{
"epoch": 8.05,
"learning_rate": 0.0002197987104890706,
"loss": 0.6637,
"step": 342000
},
{
"epoch": 8.1,
"learning_rate": 0.00021932693819782983,
"loss": 0.6612,
"step": 344000
},
{
"epoch": 8.15,
"learning_rate": 0.00021885516590658906,
"loss": 0.6665,
"step": 346000
},
{
"epoch": 8.2,
"learning_rate": 0.00021838339361534832,
"loss": 0.6702,
"step": 348000
},
{
"epoch": 8.24,
"learning_rate": 0.00021791162132410753,
"loss": 0.6667,
"step": 350000
},
{
"epoch": 8.29,
"learning_rate": 0.0002174398490328668,
"loss": 0.6674,
"step": 352000
},
{
"epoch": 8.34,
"learning_rate": 0.000216968076741626,
"loss": 0.6719,
"step": 354000
},
{
"epoch": 8.38,
"learning_rate": 0.00021649630445038525,
"loss": 0.6671,
"step": 356000
},
{
"epoch": 8.43,
"learning_rate": 0.0002160245321591445,
"loss": 0.6647,
"step": 358000
},
{
"epoch": 8.48,
"learning_rate": 0.00021555275986790375,
"loss": 0.6671,
"step": 360000
},
{
"epoch": 8.53,
"learning_rate": 0.00021508098757666298,
"loss": 0.6681,
"step": 362000
},
{
"epoch": 8.57,
"learning_rate": 0.0002146092152854222,
"loss": 0.6727,
"step": 364000
},
{
"epoch": 8.62,
"learning_rate": 0.00021413744299418147,
"loss": 0.6767,
"step": 366000
},
{
"epoch": 8.67,
"learning_rate": 0.00021366567070294068,
"loss": 0.6749,
"step": 368000
},
{
"epoch": 8.71,
"learning_rate": 0.00021319389841169994,
"loss": 0.6704,
"step": 370000
},
{
"epoch": 8.76,
"learning_rate": 0.00021272212612045917,
"loss": 0.6729,
"step": 372000
},
{
"epoch": 8.81,
"learning_rate": 0.0002122503538292184,
"loss": 0.6641,
"step": 374000
},
{
"epoch": 8.86,
"learning_rate": 0.00021177858153797766,
"loss": 0.6678,
"step": 376000
},
{
"epoch": 8.9,
"learning_rate": 0.0002113068092467369,
"loss": 0.6677,
"step": 378000
},
{
"epoch": 8.95,
"learning_rate": 0.00021083503695549613,
"loss": 0.6683,
"step": 380000
},
{
"epoch": 9.0,
"learning_rate": 0.00021036326466425536,
"loss": 0.6746,
"step": 382000
},
{
"epoch": 9.04,
"learning_rate": 0.00020989149237301462,
"loss": 0.6469,
"step": 384000
},
{
"epoch": 9.09,
"learning_rate": 0.00020941972008177383,
"loss": 0.6496,
"step": 386000
},
{
"epoch": 9.14,
"learning_rate": 0.00020894794779053309,
"loss": 0.6513,
"step": 388000
},
{
"epoch": 9.19,
"learning_rate": 0.00020847617549929232,
"loss": 0.6516,
"step": 390000
},
{
"epoch": 9.23,
"learning_rate": 0.00020800440320805155,
"loss": 0.6556,
"step": 392000
},
{
"epoch": 9.28,
"learning_rate": 0.0002075326309168108,
"loss": 0.6477,
"step": 394000
},
{
"epoch": 9.33,
"learning_rate": 0.00020706085862557004,
"loss": 0.6571,
"step": 396000
},
{
"epoch": 9.37,
"learning_rate": 0.00020658908633432928,
"loss": 0.6479,
"step": 398000
},
{
"epoch": 9.42,
"learning_rate": 0.0002061173140430885,
"loss": 0.65,
"step": 400000
},
{
"epoch": 9.47,
"learning_rate": 0.00020564554175184777,
"loss": 0.6557,
"step": 402000
},
{
"epoch": 9.51,
"learning_rate": 0.00020517376946060697,
"loss": 0.6644,
"step": 404000
},
{
"epoch": 9.56,
"learning_rate": 0.00020470199716936623,
"loss": 0.6558,
"step": 406000
},
{
"epoch": 9.61,
"learning_rate": 0.00020423022487812547,
"loss": 0.6503,
"step": 408000
},
{
"epoch": 9.66,
"learning_rate": 0.0002037584525868847,
"loss": 0.6569,
"step": 410000
},
{
"epoch": 9.7,
"learning_rate": 0.00020328668029564396,
"loss": 0.66,
"step": 412000
},
{
"epoch": 9.75,
"learning_rate": 0.0002028149080044032,
"loss": 0.6528,
"step": 414000
},
{
"epoch": 9.8,
"learning_rate": 0.00020234313571316245,
"loss": 0.6493,
"step": 416000
},
{
"epoch": 9.84,
"learning_rate": 0.00020187136342192166,
"loss": 0.6599,
"step": 418000
},
{
"epoch": 9.89,
"learning_rate": 0.00020139959113068092,
"loss": 0.6534,
"step": 420000
},
{
"epoch": 9.94,
"learning_rate": 0.00020092781883944012,
"loss": 0.6546,
"step": 422000
},
{
"epoch": 9.99,
"learning_rate": 0.00020045604654819938,
"loss": 0.6488,
"step": 424000
},
{
"epoch": 10.03,
"learning_rate": 0.00019998427425695862,
"loss": 0.643,
"step": 426000
},
{
"epoch": 10.08,
"learning_rate": 0.00019951250196571788,
"loss": 0.6465,
"step": 428000
},
{
"epoch": 10.13,
"learning_rate": 0.0001990407296744771,
"loss": 0.6324,
"step": 430000
},
{
"epoch": 10.17,
"learning_rate": 0.00019856895738323634,
"loss": 0.6446,
"step": 432000
},
{
"epoch": 10.22,
"learning_rate": 0.0001980971850919956,
"loss": 0.6283,
"step": 434000
},
{
"epoch": 10.27,
"learning_rate": 0.0001976254128007548,
"loss": 0.635,
"step": 436000
},
{
"epoch": 10.32,
"learning_rate": 0.00019715364050951407,
"loss": 0.6333,
"step": 438000
},
{
"epoch": 10.36,
"learning_rate": 0.0001966818682182733,
"loss": 0.6378,
"step": 440000
},
{
"epoch": 10.41,
"learning_rate": 0.00019621009592703253,
"loss": 0.6369,
"step": 442000
},
{
"epoch": 10.46,
"learning_rate": 0.00019573832363579177,
"loss": 0.6348,
"step": 444000
},
{
"epoch": 10.5,
"learning_rate": 0.00019526655134455103,
"loss": 0.6439,
"step": 446000
},
{
"epoch": 10.55,
"learning_rate": 0.00019479477905331026,
"loss": 0.6342,
"step": 448000
},
{
"epoch": 10.6,
"learning_rate": 0.0001943230067620695,
"loss": 0.6424,
"step": 450000
},
{
"epoch": 10.65,
"learning_rate": 0.00019385123447082875,
"loss": 0.6408,
"step": 452000
},
{
"epoch": 10.69,
"learning_rate": 0.00019337946217958796,
"loss": 0.6413,
"step": 454000
},
{
"epoch": 10.74,
"learning_rate": 0.00019290768988834722,
"loss": 0.643,
"step": 456000
},
{
"epoch": 10.79,
"learning_rate": 0.00019243591759710645,
"loss": 0.6421,
"step": 458000
},
{
"epoch": 10.83,
"learning_rate": 0.00019196414530586568,
"loss": 0.6393,
"step": 460000
},
{
"epoch": 10.88,
"learning_rate": 0.00019149237301462491,
"loss": 0.6385,
"step": 462000
},
{
"epoch": 10.93,
"learning_rate": 0.00019102060072338417,
"loss": 0.6387,
"step": 464000
},
{
"epoch": 10.98,
"learning_rate": 0.00019054882843214338,
"loss": 0.6442,
"step": 466000
},
{
"epoch": 11.02,
"learning_rate": 0.00019007705614090264,
"loss": 0.6294,
"step": 468000
},
{
"epoch": 11.07,
"learning_rate": 0.0001896052838496619,
"loss": 0.6191,
"step": 470000
},
{
"epoch": 11.12,
"learning_rate": 0.0001891335115584211,
"loss": 0.6226,
"step": 472000
},
{
"epoch": 11.16,
"learning_rate": 0.00018866173926718036,
"loss": 0.6207,
"step": 474000
},
{
"epoch": 11.21,
"learning_rate": 0.0001881899669759396,
"loss": 0.6282,
"step": 476000
},
{
"epoch": 11.26,
"learning_rate": 0.00018771819468469883,
"loss": 0.615,
"step": 478000
},
{
"epoch": 11.3,
"learning_rate": 0.00018724642239345806,
"loss": 0.6201,
"step": 480000
},
{
"epoch": 11.35,
"learning_rate": 0.00018677465010221732,
"loss": 0.6199,
"step": 482000
},
{
"epoch": 11.4,
"learning_rate": 0.00018630287781097653,
"loss": 0.6211,
"step": 484000
},
{
"epoch": 11.45,
"learning_rate": 0.0001858311055197358,
"loss": 0.6372,
"step": 486000
},
{
"epoch": 11.49,
"learning_rate": 0.00018535933322849505,
"loss": 0.6268,
"step": 488000
},
{
"epoch": 11.54,
"learning_rate": 0.00018488756093725425,
"loss": 0.6283,
"step": 490000
},
{
"epoch": 11.59,
"learning_rate": 0.00018441578864601351,
"loss": 0.6398,
"step": 492000
},
{
"epoch": 11.63,
"learning_rate": 0.00018394401635477275,
"loss": 0.6282,
"step": 494000
},
{
"epoch": 11.68,
"learning_rate": 0.00018347224406353198,
"loss": 0.6327,
"step": 496000
},
{
"epoch": 11.73,
"learning_rate": 0.0001830004717722912,
"loss": 0.6226,
"step": 498000
},
{
"epoch": 11.78,
"learning_rate": 0.00018252869948105047,
"loss": 0.6321,
"step": 500000
},
{
"epoch": 11.82,
"learning_rate": 0.00018205692718980968,
"loss": 0.6244,
"step": 502000
},
{
"epoch": 11.87,
"learning_rate": 0.00018158515489856894,
"loss": 0.6257,
"step": 504000
},
{
"epoch": 11.92,
"learning_rate": 0.0001811133826073282,
"loss": 0.6291,
"step": 506000
},
{
"epoch": 11.96,
"learning_rate": 0.0001806416103160874,
"loss": 0.6283,
"step": 508000
},
{
"epoch": 12.01,
"learning_rate": 0.00018016983802484666,
"loss": 0.6246,
"step": 510000
},
{
"epoch": 12.06,
"learning_rate": 0.0001796980657336059,
"loss": 0.6133,
"step": 512000
},
{
"epoch": 12.11,
"learning_rate": 0.00017922629344236516,
"loss": 0.6069,
"step": 514000
},
{
"epoch": 12.15,
"learning_rate": 0.00017875452115112436,
"loss": 0.6163,
"step": 516000
},
{
"epoch": 12.2,
"learning_rate": 0.00017828274885988362,
"loss": 0.6131,
"step": 518000
},
{
"epoch": 12.25,
"learning_rate": 0.00017781097656864283,
"loss": 0.6096,
"step": 520000
},
{
"epoch": 12.29,
"learning_rate": 0.00017733920427740209,
"loss": 0.6073,
"step": 522000
},
{
"epoch": 12.34,
"learning_rate": 0.00017686743198616135,
"loss": 0.6068,
"step": 524000
},
{
"epoch": 12.39,
"learning_rate": 0.00017639565969492058,
"loss": 0.6114,
"step": 526000
},
{
"epoch": 12.44,
"learning_rate": 0.0001759238874036798,
"loss": 0.6189,
"step": 528000
},
{
"epoch": 12.48,
"learning_rate": 0.00017545211511243904,
"loss": 0.6111,
"step": 530000
},
{
"epoch": 12.53,
"learning_rate": 0.0001749803428211983,
"loss": 0.6085,
"step": 532000
},
{
"epoch": 12.58,
"learning_rate": 0.0001745085705299575,
"loss": 0.6164,
"step": 534000
},
{
"epoch": 12.62,
"learning_rate": 0.00017403679823871677,
"loss": 0.6169,
"step": 536000
},
{
"epoch": 12.67,
"learning_rate": 0.00017356502594747598,
"loss": 0.615,
"step": 538000
},
{
"epoch": 12.72,
"learning_rate": 0.00017309325365623524,
"loss": 0.6145,
"step": 540000
},
{
"epoch": 12.76,
"learning_rate": 0.0001726214813649945,
"loss": 0.6149,
"step": 542000
},
{
"epoch": 12.81,
"learning_rate": 0.00017214970907375373,
"loss": 0.6128,
"step": 544000
},
{
"epoch": 12.86,
"learning_rate": 0.00017167793678251296,
"loss": 0.6123,
"step": 546000
},
{
"epoch": 12.91,
"learning_rate": 0.0001712061644912722,
"loss": 0.6132,
"step": 548000
},
{
"epoch": 12.95,
"learning_rate": 0.00017073439220003145,
"loss": 0.6154,
"step": 550000
},
{
"epoch": 13.0,
"learning_rate": 0.00017026261990879066,
"loss": 0.6168,
"step": 552000
},
{
"epoch": 13.05,
"learning_rate": 0.00016979084761754992,
"loss": 0.5926,
"step": 554000
},
{
"epoch": 13.09,
"learning_rate": 0.00016931907532630915,
"loss": 0.5924,
"step": 556000
},
{
"epoch": 13.14,
"learning_rate": 0.00016884730303506838,
"loss": 0.5949,
"step": 558000
},
{
"epoch": 13.19,
"learning_rate": 0.00016837553074382764,
"loss": 0.5947,
"step": 560000
},
{
"epoch": 13.24,
"learning_rate": 0.00016790375845258688,
"loss": 0.5971,
"step": 562000
},
{
"epoch": 13.28,
"learning_rate": 0.0001674319861613461,
"loss": 0.5976,
"step": 564000
},
{
"epoch": 13.33,
"learning_rate": 0.00016696021387010534,
"loss": 0.5984,
"step": 566000
},
{
"epoch": 13.38,
"learning_rate": 0.0001664884415788646,
"loss": 0.606,
"step": 568000
},
{
"epoch": 13.42,
"learning_rate": 0.0001660166692876238,
"loss": 0.6091,
"step": 570000
},
{
"epoch": 13.47,
"learning_rate": 0.00016554489699638307,
"loss": 0.5927,
"step": 572000
},
{
"epoch": 13.52,
"learning_rate": 0.0001650731247051423,
"loss": 0.6067,
"step": 574000
},
{
"epoch": 13.57,
"learning_rate": 0.00016460135241390153,
"loss": 0.6024,
"step": 576000
},
{
"epoch": 13.61,
"learning_rate": 0.0001641295801226608,
"loss": 0.6055,
"step": 578000
},
{
"epoch": 13.66,
"learning_rate": 0.00016365780783142003,
"loss": 0.6003,
"step": 580000
},
{
"epoch": 13.71,
"learning_rate": 0.00016318603554017929,
"loss": 0.6076,
"step": 582000
},
{
"epoch": 13.75,
"learning_rate": 0.0001627142632489385,
"loss": 0.606,
"step": 584000
},
{
"epoch": 13.8,
"learning_rate": 0.00016224249095769775,
"loss": 0.5901,
"step": 586000
},
{
"epoch": 13.85,
"learning_rate": 0.00016177071866645696,
"loss": 0.6084,
"step": 588000
},
{
"epoch": 13.9,
"learning_rate": 0.00016129894637521622,
"loss": 0.6075,
"step": 590000
},
{
"epoch": 13.94,
"learning_rate": 0.00016082717408397545,
"loss": 0.6085,
"step": 592000
},
{
"epoch": 13.99,
"learning_rate": 0.00016035540179273468,
"loss": 0.6009,
"step": 594000
},
{
"epoch": 14.04,
"learning_rate": 0.00015988362950149391,
"loss": 0.5894,
"step": 596000
},
{
"epoch": 14.08,
"learning_rate": 0.00015941185721025317,
"loss": 0.5835,
"step": 598000
},
{
"epoch": 14.13,
"learning_rate": 0.00015894008491901243,
"loss": 0.5859,
"step": 600000
},
{
"epoch": 14.18,
"learning_rate": 0.00015846831262777164,
"loss": 0.5929,
"step": 602000
},
{
"epoch": 14.23,
"learning_rate": 0.0001579965403365309,
"loss": 0.59,
"step": 604000
},
{
"epoch": 14.27,
"learning_rate": 0.0001575247680452901,
"loss": 0.582,
"step": 606000
},
{
"epoch": 14.32,
"learning_rate": 0.00015705299575404937,
"loss": 0.588,
"step": 608000
},
{
"epoch": 14.37,
"learning_rate": 0.0001565812234628086,
"loss": 0.5923,
"step": 610000
},
{
"epoch": 14.41,
"learning_rate": 0.00015610945117156786,
"loss": 0.5837,
"step": 612000
},
{
"epoch": 14.46,
"learning_rate": 0.00015563767888032706,
"loss": 0.5898,
"step": 614000
},
{
"epoch": 14.51,
"learning_rate": 0.00015516590658908632,
"loss": 0.5904,
"step": 616000
},
{
"epoch": 14.55,
"learning_rate": 0.00015469413429784558,
"loss": 0.5892,
"step": 618000
},
{
"epoch": 14.6,
"learning_rate": 0.0001542223620066048,
"loss": 0.5887,
"step": 620000
},
{
"epoch": 14.65,
"learning_rate": 0.00015375058971536405,
"loss": 0.5843,
"step": 622000
},
{
"epoch": 14.7,
"learning_rate": 0.00015327881742412328,
"loss": 0.5853,
"step": 624000
},
{
"epoch": 14.74,
"learning_rate": 0.00015280704513288251,
"loss": 0.5919,
"step": 626000
},
{
"epoch": 14.79,
"learning_rate": 0.00015233527284164175,
"loss": 0.5915,
"step": 628000
},
{
"epoch": 14.84,
"learning_rate": 0.000151863500550401,
"loss": 0.5918,
"step": 630000
},
{
"epoch": 14.88,
"learning_rate": 0.0001513917282591602,
"loss": 0.5932,
"step": 632000
},
{
"epoch": 14.93,
"learning_rate": 0.00015091995596791947,
"loss": 0.5891,
"step": 634000
},
{
"epoch": 14.98,
"learning_rate": 0.00015044818367667873,
"loss": 0.5888,
"step": 636000
},
{
"epoch": 15.03,
"learning_rate": 0.00014997641138543794,
"loss": 0.5817,
"step": 638000
},
{
"epoch": 15.07,
"learning_rate": 0.00014950463909419717,
"loss": 0.574,
"step": 640000
},
{
"epoch": 15.12,
"learning_rate": 0.00014903286680295643,
"loss": 0.5723,
"step": 642000
},
{
"epoch": 15.17,
"learning_rate": 0.00014856109451171566,
"loss": 0.573,
"step": 644000
},
{
"epoch": 15.21,
"learning_rate": 0.00014808932222047492,
"loss": 0.5758,
"step": 646000
},
{
"epoch": 15.26,
"learning_rate": 0.00014761754992923416,
"loss": 0.5796,
"step": 648000
},
{
"epoch": 15.31,
"learning_rate": 0.0001471457776379934,
"loss": 0.5722,
"step": 650000
},
{
"epoch": 15.36,
"learning_rate": 0.00014667400534675262,
"loss": 0.5825,
"step": 652000
},
{
"epoch": 15.4,
"learning_rate": 0.00014620223305551185,
"loss": 0.5744,
"step": 654000
},
{
"epoch": 15.45,
"learning_rate": 0.0001457304607642711,
"loss": 0.5786,
"step": 656000
},
{
"epoch": 15.5,
"learning_rate": 0.00014525868847303032,
"loss": 0.5808,
"step": 658000
},
{
"epoch": 15.54,
"learning_rate": 0.00014478691618178958,
"loss": 0.5855,
"step": 660000
},
{
"epoch": 15.59,
"learning_rate": 0.0001443151438905488,
"loss": 0.585,
"step": 662000
},
{
"epoch": 15.64,
"learning_rate": 0.00014384337159930807,
"loss": 0.5692,
"step": 664000
},
{
"epoch": 15.69,
"learning_rate": 0.0001433715993080673,
"loss": 0.5789,
"step": 666000
},
{
"epoch": 15.73,
"learning_rate": 0.00014289982701682654,
"loss": 0.5836,
"step": 668000
},
{
"epoch": 15.78,
"learning_rate": 0.00014242805472558577,
"loss": 0.5796,
"step": 670000
},
{
"epoch": 15.83,
"learning_rate": 0.000141956282434345,
"loss": 0.5756,
"step": 672000
},
{
"epoch": 15.87,
"learning_rate": 0.00014148451014310424,
"loss": 0.5667,
"step": 674000
},
{
"epoch": 15.92,
"learning_rate": 0.0001410127378518635,
"loss": 0.5845,
"step": 676000
},
{
"epoch": 15.97,
"learning_rate": 0.00014054096556062273,
"loss": 0.5783,
"step": 678000
},
{
"epoch": 16.02,
"learning_rate": 0.00014006919326938196,
"loss": 0.5749,
"step": 680000
},
{
"epoch": 16.06,
"learning_rate": 0.00013959742097814122,
"loss": 0.5602,
"step": 682000
},
{
"epoch": 16.11,
"learning_rate": 0.00013912564868690045,
"loss": 0.564,
"step": 684000
},
{
"epoch": 16.16,
"learning_rate": 0.00013865387639565969,
"loss": 0.5659,
"step": 686000
},
{
"epoch": 16.2,
"learning_rate": 0.00013818210410441892,
"loss": 0.5608,
"step": 688000
},
{
"epoch": 16.25,
"learning_rate": 0.00013771033181317815,
"loss": 0.5668,
"step": 690000
},
{
"epoch": 16.3,
"learning_rate": 0.00013723855952193738,
"loss": 0.5646,
"step": 692000
},
{
"epoch": 16.34,
"learning_rate": 0.00013676678723069664,
"loss": 0.5663,
"step": 694000
},
{
"epoch": 16.39,
"learning_rate": 0.00013629501493945588,
"loss": 0.5716,
"step": 696000
},
{
"epoch": 16.44,
"learning_rate": 0.0001358232426482151,
"loss": 0.5716,
"step": 698000
},
{
"epoch": 16.49,
"learning_rate": 0.00013535147035697437,
"loss": 0.568,
"step": 700000
},
{
"epoch": 16.53,
"learning_rate": 0.0001348796980657336,
"loss": 0.566,
"step": 702000
},
{
"epoch": 16.58,
"learning_rate": 0.00013440792577449284,
"loss": 0.5657,
"step": 704000
},
{
"epoch": 16.63,
"learning_rate": 0.00013393615348325207,
"loss": 0.5641,
"step": 706000
},
{
"epoch": 16.67,
"learning_rate": 0.0001334643811920113,
"loss": 0.5679,
"step": 708000
},
{
"epoch": 16.72,
"learning_rate": 0.00013299260890077056,
"loss": 0.5652,
"step": 710000
},
{
"epoch": 16.77,
"learning_rate": 0.0001325208366095298,
"loss": 0.5672,
"step": 712000
},
{
"epoch": 16.82,
"learning_rate": 0.00013204906431828903,
"loss": 0.5695,
"step": 714000
},
{
"epoch": 16.86,
"learning_rate": 0.00013157729202704826,
"loss": 0.5719,
"step": 716000
},
{
"epoch": 16.91,
"learning_rate": 0.00013110551973580752,
"loss": 0.571,
"step": 718000
},
{
"epoch": 16.96,
"learning_rate": 0.00013063374744456675,
"loss": 0.5684,
"step": 720000
},
{
"epoch": 17.0,
"learning_rate": 0.00013016197515332598,
"loss": 0.5672,
"step": 722000
},
{
"epoch": 17.05,
"learning_rate": 0.00012969020286208522,
"loss": 0.5566,
"step": 724000
},
{
"epoch": 17.1,
"learning_rate": 0.00012921843057084445,
"loss": 0.5459,
"step": 726000
},
{
"epoch": 17.15,
"learning_rate": 0.0001287466582796037,
"loss": 0.5522,
"step": 728000
},
{
"epoch": 17.19,
"learning_rate": 0.00012827488598836294,
"loss": 0.5586,
"step": 730000
},
{
"epoch": 17.24,
"learning_rate": 0.00012780311369712217,
"loss": 0.5551,
"step": 732000
},
{
"epoch": 17.29,
"learning_rate": 0.0001273313414058814,
"loss": 0.558,
"step": 734000
},
{
"epoch": 17.33,
"learning_rate": 0.00012685956911464067,
"loss": 0.5501,
"step": 736000
},
{
"epoch": 17.38,
"learning_rate": 0.0001263877968233999,
"loss": 0.5496,
"step": 738000
},
{
"epoch": 17.43,
"learning_rate": 0.00012591602453215913,
"loss": 0.5617,
"step": 740000
},
{
"epoch": 17.48,
"learning_rate": 0.00012544425224091837,
"loss": 0.5481,
"step": 742000
},
{
"epoch": 17.52,
"learning_rate": 0.00012497247994967763,
"loss": 0.5627,
"step": 744000
},
{
"epoch": 17.57,
"learning_rate": 0.00012450070765843686,
"loss": 0.5556,
"step": 746000
},
{
"epoch": 17.62,
"learning_rate": 0.0001240289353671961,
"loss": 0.5565,
"step": 748000
},
{
"epoch": 17.66,
"learning_rate": 0.00012355716307595532,
"loss": 0.5591,
"step": 750000
},
{
"epoch": 17.71,
"learning_rate": 0.00012308539078471456,
"loss": 0.5526,
"step": 752000
},
{
"epoch": 17.76,
"learning_rate": 0.00012261361849347382,
"loss": 0.5564,
"step": 754000
},
{
"epoch": 17.8,
"learning_rate": 0.00012214184620223305,
"loss": 0.5541,
"step": 756000
},
{
"epoch": 17.85,
"learning_rate": 0.00012167007391099228,
"loss": 0.5601,
"step": 758000
},
{
"epoch": 17.9,
"learning_rate": 0.00012119830161975153,
"loss": 0.5555,
"step": 760000
},
{
"epoch": 17.95,
"learning_rate": 0.00012072652932851076,
"loss": 0.5525,
"step": 762000
},
{
"epoch": 17.99,
"learning_rate": 0.00012025475703727,
"loss": 0.5577,
"step": 764000
},
{
"epoch": 18.04,
"learning_rate": 0.00011978298474602924,
"loss": 0.5403,
"step": 766000
},
{
"epoch": 18.09,
"learning_rate": 0.00011931121245478847,
"loss": 0.5466,
"step": 768000
},
{
"epoch": 18.13,
"learning_rate": 0.0001188394401635477,
"loss": 0.546,
"step": 770000
},
{
"epoch": 18.18,
"learning_rate": 0.00011836766787230695,
"loss": 0.5435,
"step": 772000
},
{
"epoch": 18.23,
"learning_rate": 0.0001178958955810662,
"loss": 0.5442,
"step": 774000
},
{
"epoch": 18.28,
"learning_rate": 0.00011742412328982544,
"loss": 0.5381,
"step": 776000
},
{
"epoch": 18.32,
"learning_rate": 0.00011695235099858468,
"loss": 0.542,
"step": 778000
},
{
"epoch": 18.37,
"learning_rate": 0.00011648057870734391,
"loss": 0.5458,
"step": 780000
},
{
"epoch": 18.42,
"learning_rate": 0.00011600880641610316,
"loss": 0.5473,
"step": 782000
},
{
"epoch": 18.46,
"learning_rate": 0.00011553703412486239,
"loss": 0.5511,
"step": 784000
},
{
"epoch": 18.51,
"learning_rate": 0.00011506526183362162,
"loss": 0.5415,
"step": 786000
},
{
"epoch": 18.56,
"learning_rate": 0.00011459348954238087,
"loss": 0.5431,
"step": 788000
},
{
"epoch": 18.61,
"learning_rate": 0.0001141217172511401,
"loss": 0.5403,
"step": 790000
},
{
"epoch": 18.65,
"learning_rate": 0.00011364994495989935,
"loss": 0.5402,
"step": 792000
},
{
"epoch": 18.7,
"learning_rate": 0.00011317817266865859,
"loss": 0.5447,
"step": 794000
},
{
"epoch": 18.75,
"learning_rate": 0.00011270640037741783,
"loss": 0.5485,
"step": 796000
},
{
"epoch": 18.79,
"learning_rate": 0.00011223462808617706,
"loss": 0.5485,
"step": 798000
},
{
"epoch": 18.84,
"learning_rate": 0.0001117628557949363,
"loss": 0.5441,
"step": 800000
},
{
"epoch": 18.89,
"learning_rate": 0.00011129108350369554,
"loss": 0.5457,
"step": 802000
},
{
"epoch": 18.94,
"learning_rate": 0.00011081931121245477,
"loss": 0.5416,
"step": 804000
},
{
"epoch": 18.98,
"learning_rate": 0.00011034753892121402,
"loss": 0.5494,
"step": 806000
},
{
"epoch": 19.03,
"learning_rate": 0.00010987576662997325,
"loss": 0.5347,
"step": 808000
},
{
"epoch": 19.08,
"learning_rate": 0.00010940399433873251,
"loss": 0.5257,
"step": 810000
},
{
"epoch": 19.12,
"learning_rate": 0.00010893222204749174,
"loss": 0.5278,
"step": 812000
},
{
"epoch": 19.17,
"learning_rate": 0.00010846044975625097,
"loss": 0.5355,
"step": 814000
},
{
"epoch": 19.22,
"learning_rate": 0.00010798867746501022,
"loss": 0.5378,
"step": 816000
},
{
"epoch": 19.27,
"learning_rate": 0.00010751690517376945,
"loss": 0.537,
"step": 818000
},
{
"epoch": 19.31,
"learning_rate": 0.00010704513288252869,
"loss": 0.5385,
"step": 820000
},
{
"epoch": 19.36,
"learning_rate": 0.00010657336059128793,
"loss": 0.5302,
"step": 822000
},
{
"epoch": 19.41,
"learning_rate": 0.00010610158830004717,
"loss": 0.5409,
"step": 824000
},
{
"epoch": 19.45,
"learning_rate": 0.0001056298160088064,
"loss": 0.5347,
"step": 826000
},
{
"epoch": 19.5,
"learning_rate": 0.00010515804371756563,
"loss": 0.5288,
"step": 828000
},
{
"epoch": 19.55,
"learning_rate": 0.00010468627142632489,
"loss": 0.5276,
"step": 830000
},
{
"epoch": 19.59,
"learning_rate": 0.00010421449913508412,
"loss": 0.5334,
"step": 832000
},
{
"epoch": 19.64,
"learning_rate": 0.00010374272684384337,
"loss": 0.5407,
"step": 834000
},
{
"epoch": 19.69,
"learning_rate": 0.0001032709545526026,
"loss": 0.5354,
"step": 836000
},
{
"epoch": 19.74,
"learning_rate": 0.00010279918226136184,
"loss": 0.5274,
"step": 838000
},
{
"epoch": 19.78,
"learning_rate": 0.00010232740997012108,
"loss": 0.529,
"step": 840000
},
{
"epoch": 19.83,
"learning_rate": 0.00010185563767888031,
"loss": 0.5327,
"step": 842000
},
{
"epoch": 19.88,
"learning_rate": 0.00010138386538763955,
"loss": 0.5383,
"step": 844000
},
{
"epoch": 19.92,
"learning_rate": 0.0001009120930963988,
"loss": 0.5352,
"step": 846000
},
{
"epoch": 19.97,
"learning_rate": 0.00010044032080515804,
"loss": 0.5355,
"step": 848000
},
{
"epoch": 20.02,
"learning_rate": 9.996854851391727e-05,
"loss": 0.5233,
"step": 850000
},
{
"epoch": 20.07,
"learning_rate": 9.949677622267652e-05,
"loss": 0.5255,
"step": 852000
},
{
"epoch": 20.11,
"learning_rate": 9.902500393143575e-05,
"loss": 0.5168,
"step": 854000
},
{
"epoch": 20.16,
"learning_rate": 9.855323164019498e-05,
"loss": 0.5181,
"step": 856000
},
{
"epoch": 20.21,
"learning_rate": 9.808145934895423e-05,
"loss": 0.5245,
"step": 858000
},
{
"epoch": 20.25,
"learning_rate": 9.760968705771346e-05,
"loss": 0.5191,
"step": 860000
},
{
"epoch": 20.3,
"learning_rate": 9.71379147664727e-05,
"loss": 0.5207,
"step": 862000
},
{
"epoch": 20.35,
"learning_rate": 9.666614247523194e-05,
"loss": 0.5251,
"step": 864000
},
{
"epoch": 20.4,
"learning_rate": 9.619437018399119e-05,
"loss": 0.5158,
"step": 866000
},
{
"epoch": 20.44,
"learning_rate": 9.572259789275043e-05,
"loss": 0.5178,
"step": 868000
},
{
"epoch": 20.49,
"learning_rate": 9.525082560150967e-05,
"loss": 0.5213,
"step": 870000
},
{
"epoch": 20.54,
"learning_rate": 9.47790533102689e-05,
"loss": 0.5271,
"step": 872000
},
{
"epoch": 20.58,
"learning_rate": 9.430728101902815e-05,
"loss": 0.5242,
"step": 874000
},
{
"epoch": 20.63,
"learning_rate": 9.383550872778738e-05,
"loss": 0.5278,
"step": 876000
},
{
"epoch": 20.68,
"learning_rate": 9.336373643654661e-05,
"loss": 0.5222,
"step": 878000
},
{
"epoch": 20.73,
"learning_rate": 9.289196414530586e-05,
"loss": 0.5242,
"step": 880000
},
{
"epoch": 20.77,
"learning_rate": 9.242019185406509e-05,
"loss": 0.5271,
"step": 882000
},
{
"epoch": 20.82,
"learning_rate": 9.194841956282434e-05,
"loss": 0.5267,
"step": 884000
},
{
"epoch": 20.87,
"learning_rate": 9.147664727158358e-05,
"loss": 0.5195,
"step": 886000
},
{
"epoch": 20.91,
"learning_rate": 9.100487498034282e-05,
"loss": 0.5162,
"step": 888000
},
{
"epoch": 20.96,
"learning_rate": 9.053310268910205e-05,
"loss": 0.5166,
"step": 890000
},
{
"epoch": 21.01,
"learning_rate": 9.00613303978613e-05,
"loss": 0.5266,
"step": 892000
},
{
"epoch": 21.06,
"learning_rate": 8.958955810662053e-05,
"loss": 0.5149,
"step": 894000
},
{
"epoch": 21.1,
"learning_rate": 8.911778581537976e-05,
"loss": 0.5023,
"step": 896000
},
{
"epoch": 21.15,
"learning_rate": 8.864601352413901e-05,
"loss": 0.5112,
"step": 898000
},
{
"epoch": 21.2,
"learning_rate": 8.817424123289824e-05,
"loss": 0.5084,
"step": 900000
},
{
"epoch": 21.24,
"learning_rate": 8.770246894165747e-05,
"loss": 0.509,
"step": 902000
},
{
"epoch": 21.29,
"learning_rate": 8.723069665041673e-05,
"loss": 0.5043,
"step": 904000
},
{
"epoch": 21.34,
"learning_rate": 8.675892435917597e-05,
"loss": 0.5188,
"step": 906000
},
{
"epoch": 21.38,
"learning_rate": 8.628715206793521e-05,
"loss": 0.5094,
"step": 908000
},
{
"epoch": 21.43,
"learning_rate": 8.581537977669444e-05,
"loss": 0.519,
"step": 910000
},
{
"epoch": 21.48,
"learning_rate": 8.534360748545368e-05,
"loss": 0.5108,
"step": 912000
},
{
"epoch": 21.53,
"learning_rate": 8.487183519421292e-05,
"loss": 0.5054,
"step": 914000
},
{
"epoch": 21.57,
"learning_rate": 8.440006290297216e-05,
"loss": 0.5092,
"step": 916000
},
{
"epoch": 21.62,
"learning_rate": 8.392829061173139e-05,
"loss": 0.5075,
"step": 918000
},
{
"epoch": 21.67,
"learning_rate": 8.345651832049062e-05,
"loss": 0.5024,
"step": 920000
},
{
"epoch": 21.71,
"learning_rate": 8.298474602924988e-05,
"loss": 0.5161,
"step": 922000
},
{
"epoch": 21.76,
"learning_rate": 8.251297373800911e-05,
"loss": 0.5117,
"step": 924000
},
{
"epoch": 21.81,
"learning_rate": 8.204120144676836e-05,
"loss": 0.5136,
"step": 926000
},
{
"epoch": 21.86,
"learning_rate": 8.15694291555276e-05,
"loss": 0.5062,
"step": 928000
},
{
"epoch": 21.9,
"learning_rate": 8.109765686428683e-05,
"loss": 0.5122,
"step": 930000
},
{
"epoch": 21.95,
"learning_rate": 8.062588457304607e-05,
"loss": 0.5113,
"step": 932000
},
{
"epoch": 22.0,
"learning_rate": 8.01541122818053e-05,
"loss": 0.5073,
"step": 934000
},
{
"epoch": 22.04,
"learning_rate": 7.968233999056454e-05,
"loss": 0.4986,
"step": 936000
},
{
"epoch": 22.09,
"learning_rate": 7.921056769932378e-05,
"loss": 0.5061,
"step": 938000
},
{
"epoch": 22.14,
"learning_rate": 7.873879540808303e-05,
"loss": 0.4963,
"step": 940000
},
{
"epoch": 22.19,
"learning_rate": 7.826702311684228e-05,
"loss": 0.4981,
"step": 942000
},
{
"epoch": 22.23,
"learning_rate": 7.779525082560151e-05,
"loss": 0.498,
"step": 944000
},
{
"epoch": 22.28,
"learning_rate": 7.732347853436074e-05,
"loss": 0.4963,
"step": 946000
},
{
"epoch": 22.33,
"learning_rate": 7.685170624311998e-05,
"loss": 0.4987,
"step": 948000
},
{
"epoch": 22.37,
"learning_rate": 7.637993395187922e-05,
"loss": 0.4999,
"step": 950000
},
{
"epoch": 22.42,
"learning_rate": 7.590816166063845e-05,
"loss": 0.4945,
"step": 952000
},
{
"epoch": 22.47,
"learning_rate": 7.543638936939769e-05,
"loss": 0.4986,
"step": 954000
},
{
"epoch": 22.52,
"learning_rate": 7.496461707815693e-05,
"loss": 0.4956,
"step": 956000
},
{
"epoch": 22.56,
"learning_rate": 7.449284478691618e-05,
"loss": 0.4958,
"step": 958000
},
{
"epoch": 22.61,
"learning_rate": 7.402107249567541e-05,
"loss": 0.5033,
"step": 960000
},
{
"epoch": 22.66,
"learning_rate": 7.354930020443464e-05,
"loss": 0.4976,
"step": 962000
},
{
"epoch": 22.7,
"learning_rate": 7.307752791319389e-05,
"loss": 0.5036,
"step": 964000
},
{
"epoch": 22.75,
"learning_rate": 7.260575562195314e-05,
"loss": 0.4973,
"step": 966000
},
{
"epoch": 22.8,
"learning_rate": 7.213398333071237e-05,
"loss": 0.501,
"step": 968000
},
{
"epoch": 22.85,
"learning_rate": 7.16622110394716e-05,
"loss": 0.5005,
"step": 970000
},
{
"epoch": 22.89,
"learning_rate": 7.119043874823085e-05,
"loss": 0.4991,
"step": 972000
},
{
"epoch": 22.94,
"learning_rate": 7.07186664569901e-05,
"loss": 0.495,
"step": 974000
},
{
"epoch": 22.99,
"learning_rate": 7.024689416574933e-05,
"loss": 0.5016,
"step": 976000
},
{
"epoch": 23.03,
"learning_rate": 6.977512187450856e-05,
"loss": 0.4966,
"step": 978000
},
{
"epoch": 23.08,
"learning_rate": 6.93033495832678e-05,
"loss": 0.4909,
"step": 980000
},
{
"epoch": 23.13,
"learning_rate": 6.883157729202704e-05,
"loss": 0.4846,
"step": 982000
},
{
"epoch": 23.17,
"learning_rate": 6.835980500078629e-05,
"loss": 0.4938,
"step": 984000
},
{
"epoch": 23.22,
"learning_rate": 6.788803270954552e-05,
"loss": 0.4967,
"step": 986000
},
{
"epoch": 23.27,
"learning_rate": 6.741626041830475e-05,
"loss": 0.4894,
"step": 988000
},
{
"epoch": 23.32,
"learning_rate": 6.6944488127064e-05,
"loss": 0.4912,
"step": 990000
},
{
"epoch": 23.36,
"learning_rate": 6.647271583582324e-05,
"loss": 0.4898,
"step": 992000
},
{
"epoch": 23.41,
"learning_rate": 6.600094354458248e-05,
"loss": 0.4854,
"step": 994000
},
{
"epoch": 23.46,
"learning_rate": 6.552917125334171e-05,
"loss": 0.4838,
"step": 996000
},
{
"epoch": 23.5,
"learning_rate": 6.505739896210096e-05,
"loss": 0.4846,
"step": 998000
},
{
"epoch": 23.55,
"learning_rate": 6.45856266708602e-05,
"loss": 0.4829,
"step": 1000000
},
{
"epoch": 23.6,
"learning_rate": 6.411385437961944e-05,
"loss": 0.4792,
"step": 1002000
},
{
"epoch": 23.65,
"learning_rate": 6.364208208837867e-05,
"loss": 0.486,
"step": 1004000
},
{
"epoch": 23.69,
"learning_rate": 6.317030979713791e-05,
"loss": 0.4946,
"step": 1006000
},
{
"epoch": 23.74,
"learning_rate": 6.269853750589715e-05,
"loss": 0.4825,
"step": 1008000
},
{
"epoch": 23.79,
"learning_rate": 6.22267652146564e-05,
"loss": 0.4845,
"step": 1010000
},
{
"epoch": 23.83,
"learning_rate": 6.175499292341563e-05,
"loss": 0.4875,
"step": 1012000
},
{
"epoch": 23.88,
"learning_rate": 6.128322063217486e-05,
"loss": 0.4935,
"step": 1014000
},
{
"epoch": 23.93,
"learning_rate": 6.0811448340934105e-05,
"loss": 0.4834,
"step": 1016000
},
{
"epoch": 23.98,
"learning_rate": 6.033967604969334e-05,
"loss": 0.4853,
"step": 1018000
},
{
"epoch": 24.02,
"learning_rate": 5.9867903758452584e-05,
"loss": 0.485,
"step": 1020000
},
{
"epoch": 24.07,
"learning_rate": 5.9396131467211824e-05,
"loss": 0.4821,
"step": 1022000
},
{
"epoch": 24.12,
"learning_rate": 5.8924359175971057e-05,
"loss": 0.4739,
"step": 1024000
},
{
"epoch": 24.16,
"learning_rate": 5.8452586884730296e-05,
"loss": 0.4824,
"step": 1026000
},
{
"epoch": 24.21,
"learning_rate": 5.798081459348954e-05,
"loss": 0.4823,
"step": 1028000
},
{
"epoch": 24.26,
"learning_rate": 5.7509042302248775e-05,
"loss": 0.4805,
"step": 1030000
},
{
"epoch": 24.31,
"learning_rate": 5.7037270011008015e-05,
"loss": 0.4727,
"step": 1032000
},
{
"epoch": 24.35,
"learning_rate": 5.6565497719767254e-05,
"loss": 0.4726,
"step": 1034000
},
{
"epoch": 24.4,
"learning_rate": 5.609372542852649e-05,
"loss": 0.4734,
"step": 1036000
},
{
"epoch": 24.45,
"learning_rate": 5.562195313728573e-05,
"loss": 0.4775,
"step": 1038000
},
{
"epoch": 24.49,
"learning_rate": 5.515018084604497e-05,
"loss": 0.4761,
"step": 1040000
},
{
"epoch": 24.54,
"learning_rate": 5.467840855480421e-05,
"loss": 0.4692,
"step": 1042000
},
{
"epoch": 24.59,
"learning_rate": 5.4206636263563445e-05,
"loss": 0.4826,
"step": 1044000
},
{
"epoch": 24.63,
"learning_rate": 5.3734863972322684e-05,
"loss": 0.4767,
"step": 1046000
},
{
"epoch": 24.68,
"learning_rate": 5.326309168108193e-05,
"loss": 0.4733,
"step": 1048000
},
{
"epoch": 24.73,
"learning_rate": 5.2791319389841163e-05,
"loss": 0.4772,
"step": 1050000
},
{
"epoch": 24.78,
"learning_rate": 5.23195470986004e-05,
"loss": 0.4758,
"step": 1052000
},
{
"epoch": 24.82,
"learning_rate": 5.184777480735964e-05,
"loss": 0.48,
"step": 1054000
},
{
"epoch": 24.87,
"learning_rate": 5.137600251611889e-05,
"loss": 0.477,
"step": 1056000
},
{
"epoch": 24.92,
"learning_rate": 5.090423022487812e-05,
"loss": 0.4715,
"step": 1058000
},
{
"epoch": 24.96,
"learning_rate": 5.043245793363736e-05,
"loss": 0.4696,
"step": 1060000
},
{
"epoch": 25.01,
"learning_rate": 4.99606856423966e-05,
"loss": 0.4682,
"step": 1062000
},
{
"epoch": 25.06,
"learning_rate": 4.948891335115583e-05,
"loss": 0.4725,
"step": 1064000
},
{
"epoch": 25.11,
"learning_rate": 4.901714105991508e-05,
"loss": 0.4655,
"step": 1066000
},
{
"epoch": 25.15,
"learning_rate": 4.854536876867432e-05,
"loss": 0.4629,
"step": 1068000
},
{
"epoch": 25.2,
"learning_rate": 4.807359647743355e-05,
"loss": 0.4654,
"step": 1070000
},
{
"epoch": 25.25,
"learning_rate": 4.760182418619279e-05,
"loss": 0.4663,
"step": 1072000
},
{
"epoch": 25.29,
"learning_rate": 4.713005189495203e-05,
"loss": 0.4682,
"step": 1074000
},
{
"epoch": 25.34,
"learning_rate": 4.665827960371127e-05,
"loss": 0.4647,
"step": 1076000
},
{
"epoch": 25.39,
"learning_rate": 4.618650731247051e-05,
"loss": 0.4722,
"step": 1078000
},
{
"epoch": 25.44,
"learning_rate": 4.571473502122975e-05,
"loss": 0.4604,
"step": 1080000
},
{
"epoch": 25.48,
"learning_rate": 4.524296272998898e-05,
"loss": 0.4585,
"step": 1082000
},
{
"epoch": 25.53,
"learning_rate": 4.477119043874823e-05,
"loss": 0.4619,
"step": 1084000
},
{
"epoch": 25.58,
"learning_rate": 4.429941814750747e-05,
"loss": 0.4661,
"step": 1086000
},
{
"epoch": 25.62,
"learning_rate": 4.382764585626671e-05,
"loss": 0.4724,
"step": 1088000
},
{
"epoch": 25.67,
"learning_rate": 4.335587356502594e-05,
"loss": 0.4667,
"step": 1090000
},
{
"epoch": 25.72,
"learning_rate": 4.288410127378518e-05,
"loss": 0.4692,
"step": 1092000
},
{
"epoch": 25.77,
"learning_rate": 4.2412328982544426e-05,
"loss": 0.4624,
"step": 1094000
},
{
"epoch": 25.81,
"learning_rate": 4.194055669130366e-05,
"loss": 0.4587,
"step": 1096000
},
{
"epoch": 25.86,
"learning_rate": 4.14687844000629e-05,
"loss": 0.4604,
"step": 1098000
},
{
"epoch": 25.91,
"learning_rate": 4.099701210882214e-05,
"loss": 0.4718,
"step": 1100000
},
{
"epoch": 25.95,
"learning_rate": 4.052523981758137e-05,
"loss": 0.4666,
"step": 1102000
},
{
"epoch": 26.0,
"learning_rate": 4.005346752634062e-05,
"loss": 0.4616,
"step": 1104000
},
{
"epoch": 26.05,
"learning_rate": 3.9581695235099856e-05,
"loss": 0.456,
"step": 1106000
},
{
"epoch": 26.1,
"learning_rate": 3.9109922943859096e-05,
"loss": 0.4573,
"step": 1108000
},
{
"epoch": 26.14,
"learning_rate": 3.863815065261833e-05,
"loss": 0.4522,
"step": 1110000
},
{
"epoch": 26.19,
"learning_rate": 3.8166378361377575e-05,
"loss": 0.4604,
"step": 1112000
},
{
"epoch": 26.24,
"learning_rate": 3.7694606070136815e-05,
"loss": 0.4539,
"step": 1114000
},
{
"epoch": 26.28,
"learning_rate": 3.722283377889605e-05,
"loss": 0.4485,
"step": 1116000
},
{
"epoch": 26.33,
"learning_rate": 3.675106148765529e-05,
"loss": 0.4596,
"step": 1118000
},
{
"epoch": 26.38,
"learning_rate": 3.6279289196414526e-05,
"loss": 0.4487,
"step": 1120000
},
{
"epoch": 26.42,
"learning_rate": 3.5807516905173766e-05,
"loss": 0.459,
"step": 1122000
},
{
"epoch": 26.47,
"learning_rate": 3.5335744613933005e-05,
"loss": 0.4609,
"step": 1124000
},
{
"epoch": 26.52,
"learning_rate": 3.4863972322692245e-05,
"loss": 0.4549,
"step": 1126000
},
{
"epoch": 26.57,
"learning_rate": 3.4392200031451484e-05,
"loss": 0.4553,
"step": 1128000
},
{
"epoch": 26.61,
"learning_rate": 3.3920427740210724e-05,
"loss": 0.4552,
"step": 1130000
},
{
"epoch": 26.66,
"learning_rate": 3.344865544896996e-05,
"loss": 0.4534,
"step": 1132000
},
{
"epoch": 26.71,
"learning_rate": 3.29768831577292e-05,
"loss": 0.4522,
"step": 1134000
},
{
"epoch": 26.75,
"learning_rate": 3.2505110866488436e-05,
"loss": 0.4544,
"step": 1136000
},
{
"epoch": 26.8,
"learning_rate": 3.203333857524768e-05,
"loss": 0.458,
"step": 1138000
},
{
"epoch": 26.85,
"learning_rate": 3.1561566284006915e-05,
"loss": 0.4537,
"step": 1140000
},
{
"epoch": 26.9,
"learning_rate": 3.1089793992766154e-05,
"loss": 0.4581,
"step": 1142000
},
{
"epoch": 26.94,
"learning_rate": 3.0618021701525394e-05,
"loss": 0.4525,
"step": 1144000
},
{
"epoch": 26.99,
"learning_rate": 3.014624941028463e-05,
"loss": 0.4562,
"step": 1146000
},
{
"epoch": 27.04,
"learning_rate": 2.9674477119043873e-05,
"loss": 0.4516,
"step": 1148000
},
{
"epoch": 27.08,
"learning_rate": 2.920270482780311e-05,
"loss": 0.4472,
"step": 1150000
},
{
"epoch": 27.13,
"learning_rate": 2.8730932536562352e-05,
"loss": 0.4507,
"step": 1152000
},
{
"epoch": 27.18,
"learning_rate": 2.8259160245321588e-05,
"loss": 0.4421,
"step": 1154000
},
{
"epoch": 27.23,
"learning_rate": 2.778738795408083e-05,
"loss": 0.4487,
"step": 1156000
},
{
"epoch": 27.27,
"learning_rate": 2.7315615662840067e-05,
"loss": 0.4528,
"step": 1158000
},
{
"epoch": 27.32,
"learning_rate": 2.6843843371599303e-05,
"loss": 0.4483,
"step": 1160000
},
{
"epoch": 27.37,
"learning_rate": 2.6372071080358546e-05,
"loss": 0.45,
"step": 1162000
},
{
"epoch": 27.41,
"learning_rate": 2.5900298789117782e-05,
"loss": 0.4517,
"step": 1164000
},
{
"epoch": 27.46,
"learning_rate": 2.5428526497877022e-05,
"loss": 0.4444,
"step": 1166000
},
{
"epoch": 27.51,
"learning_rate": 2.495675420663626e-05,
"loss": 0.4457,
"step": 1168000
},
{
"epoch": 27.56,
"learning_rate": 2.44849819153955e-05,
"loss": 0.4471,
"step": 1170000
},
{
"epoch": 27.6,
"learning_rate": 2.401320962415474e-05,
"loss": 0.4455,
"step": 1172000
},
{
"epoch": 27.65,
"learning_rate": 2.3541437332913976e-05,
"loss": 0.4466,
"step": 1174000
},
{
"epoch": 27.7,
"learning_rate": 2.3069665041673216e-05,
"loss": 0.4421,
"step": 1176000
},
{
"epoch": 27.74,
"learning_rate": 2.2597892750432455e-05,
"loss": 0.446,
"step": 1178000
},
{
"epoch": 27.79,
"learning_rate": 2.2126120459191695e-05,
"loss": 0.4441,
"step": 1180000
},
{
"epoch": 27.84,
"learning_rate": 2.1654348167950935e-05,
"loss": 0.4452,
"step": 1182000
},
{
"epoch": 27.89,
"learning_rate": 2.1182575876710174e-05,
"loss": 0.4418,
"step": 1184000
},
{
"epoch": 27.93,
"learning_rate": 2.071080358546941e-05,
"loss": 0.4396,
"step": 1186000
},
{
"epoch": 27.98,
"learning_rate": 2.0239031294228653e-05,
"loss": 0.4477,
"step": 1188000
},
{
"epoch": 28.03,
"learning_rate": 1.976725900298789e-05,
"loss": 0.4435,
"step": 1190000
},
{
"epoch": 28.07,
"learning_rate": 1.9295486711747125e-05,
"loss": 0.4379,
"step": 1192000
},
{
"epoch": 28.12,
"learning_rate": 1.8823714420506368e-05,
"loss": 0.4442,
"step": 1194000
},
{
"epoch": 28.17,
"learning_rate": 1.8351942129265604e-05,
"loss": 0.4329,
"step": 1196000
},
{
"epoch": 28.21,
"learning_rate": 1.7880169838024844e-05,
"loss": 0.4368,
"step": 1198000
},
{
"epoch": 28.26,
"learning_rate": 1.7408397546784083e-05,
"loss": 0.4427,
"step": 1200000
},
{
"epoch": 28.31,
"learning_rate": 1.6936625255543323e-05,
"loss": 0.4426,
"step": 1202000
},
{
"epoch": 28.36,
"learning_rate": 1.6464852964302562e-05,
"loss": 0.4374,
"step": 1204000
},
{
"epoch": 28.4,
"learning_rate": 1.5993080673061802e-05,
"loss": 0.4372,
"step": 1206000
},
{
"epoch": 28.45,
"learning_rate": 1.552130838182104e-05,
"loss": 0.4365,
"step": 1208000
},
{
"epoch": 28.5,
"learning_rate": 1.504953609058028e-05,
"loss": 0.4329,
"step": 1210000
},
{
"epoch": 28.54,
"learning_rate": 1.4577763799339517e-05,
"loss": 0.4397,
"step": 1212000
},
{
"epoch": 28.59,
"learning_rate": 1.4105991508098757e-05,
"loss": 0.4355,
"step": 1214000
},
{
"epoch": 28.64,
"learning_rate": 1.3634219216857994e-05,
"loss": 0.4386,
"step": 1216000
},
{
"epoch": 28.69,
"learning_rate": 1.3162446925617234e-05,
"loss": 0.4369,
"step": 1218000
},
{
"epoch": 28.73,
"learning_rate": 1.2690674634376474e-05,
"loss": 0.441,
"step": 1220000
},
{
"epoch": 28.78,
"learning_rate": 1.2218902343135713e-05,
"loss": 0.4383,
"step": 1222000
},
{
"epoch": 28.83,
"learning_rate": 1.1747130051894953e-05,
"loss": 0.4338,
"step": 1224000
},
{
"epoch": 28.87,
"learning_rate": 1.1275357760654189e-05,
"loss": 0.4373,
"step": 1226000
},
{
"epoch": 28.92,
"learning_rate": 1.0803585469413428e-05,
"loss": 0.4392,
"step": 1228000
},
{
"epoch": 28.97,
"learning_rate": 1.0331813178172668e-05,
"loss": 0.4401,
"step": 1230000
},
{
"epoch": 29.02,
"learning_rate": 9.860040886931907e-06,
"loss": 0.4394,
"step": 1232000
},
{
"epoch": 29.06,
"learning_rate": 9.388268595691145e-06,
"loss": 0.4316,
"step": 1234000
},
{
"epoch": 29.11,
"learning_rate": 8.916496304450385e-06,
"loss": 0.4372,
"step": 1236000
},
{
"epoch": 29.16,
"learning_rate": 8.444724013209622e-06,
"loss": 0.4363,
"step": 1238000
},
{
"epoch": 29.2,
"learning_rate": 7.972951721968862e-06,
"loss": 0.4348,
"step": 1240000
},
{
"epoch": 29.25,
"learning_rate": 7.5011794307281015e-06,
"loss": 0.4342,
"step": 1242000
},
{
"epoch": 29.3,
"learning_rate": 7.02940713948734e-06,
"loss": 0.4295,
"step": 1244000
},
{
"epoch": 29.35,
"learning_rate": 6.557634848246579e-06,
"loss": 0.4358,
"step": 1246000
},
{
"epoch": 29.39,
"learning_rate": 6.085862557005818e-06,
"loss": 0.4317,
"step": 1248000
},
{
"epoch": 29.44,
"learning_rate": 5.614090265765056e-06,
"loss": 0.431,
"step": 1250000
},
{
"epoch": 29.49,
"learning_rate": 5.142317974524296e-06,
"loss": 0.4355,
"step": 1252000
},
{
"epoch": 29.53,
"learning_rate": 4.670545683283535e-06,
"loss": 0.4301,
"step": 1254000
},
{
"epoch": 29.58,
"learning_rate": 4.198773392042774e-06,
"loss": 0.4272,
"step": 1256000
},
{
"epoch": 29.63,
"learning_rate": 3.7270011008020125e-06,
"loss": 0.4325,
"step": 1258000
},
{
"epoch": 29.67,
"learning_rate": 3.255228809561251e-06,
"loss": 0.4288,
"step": 1260000
},
{
"epoch": 29.72,
"learning_rate": 2.7834565183204907e-06,
"loss": 0.4246,
"step": 1262000
},
{
"epoch": 29.77,
"learning_rate": 2.3116842270797294e-06,
"loss": 0.437,
"step": 1264000
},
{
"epoch": 29.82,
"learning_rate": 1.8399119358389683e-06,
"loss": 0.4337,
"step": 1266000
},
{
"epoch": 29.86,
"learning_rate": 1.3681396445982072e-06,
"loss": 0.4281,
"step": 1268000
},
{
"epoch": 29.91,
"learning_rate": 8.963673533574461e-07,
"loss": 0.4334,
"step": 1270000
},
{
"epoch": 29.96,
"learning_rate": 4.2459506211668494e-07,
"loss": 0.4294,
"step": 1272000
}
],
"logging_steps": 2000,
"max_steps": 1273800,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 2.5779090018764566e+21,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}