yesj1234's picture
Upload folder using huggingface_hub
b84724e
raw
history blame contribute delete
No virus
24.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.999988422039806,
"eval_steps": 500,
"global_step": 388669,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 0.0003,
"loss": 3.5282,
"step": 2000
},
{
"epoch": 0.09,
"learning_rate": 0.00029860416424334066,
"loss": 1.3191,
"step": 4000
},
{
"epoch": 0.14,
"learning_rate": 0.0002972083284866814,
"loss": 1.2226,
"step": 6000
},
{
"epoch": 0.19,
"learning_rate": 0.00029581249273002205,
"loss": 1.1832,
"step": 8000
},
{
"epoch": 0.23,
"learning_rate": 0.0002944166569733628,
"loss": 1.1595,
"step": 10000
},
{
"epoch": 0.28,
"learning_rate": 0.0002930208212167035,
"loss": 1.1368,
"step": 12000
},
{
"epoch": 0.32,
"learning_rate": 0.00029162498546004417,
"loss": 1.1176,
"step": 14000
},
{
"epoch": 0.37,
"learning_rate": 0.00029022914970338486,
"loss": 1.1044,
"step": 16000
},
{
"epoch": 0.42,
"learning_rate": 0.00028883331394672555,
"loss": 1.0954,
"step": 18000
},
{
"epoch": 0.46,
"learning_rate": 0.0002874374781900663,
"loss": 1.088,
"step": 20000
},
{
"epoch": 0.51,
"learning_rate": 0.000286041642433407,
"loss": 1.0765,
"step": 22000
},
{
"epoch": 0.56,
"learning_rate": 0.0002846458066767477,
"loss": 1.069,
"step": 24000
},
{
"epoch": 0.6,
"learning_rate": 0.00028324997092008837,
"loss": 1.0623,
"step": 26000
},
{
"epoch": 0.65,
"learning_rate": 0.00028185413516342906,
"loss": 1.0653,
"step": 28000
},
{
"epoch": 0.69,
"learning_rate": 0.0002804582994067698,
"loss": 1.0467,
"step": 30000
},
{
"epoch": 0.74,
"learning_rate": 0.00027906246365011044,
"loss": 1.0395,
"step": 32000
},
{
"epoch": 0.79,
"learning_rate": 0.0002776666278934512,
"loss": 1.0419,
"step": 34000
},
{
"epoch": 0.83,
"learning_rate": 0.0002762707921367919,
"loss": 1.0385,
"step": 36000
},
{
"epoch": 0.88,
"learning_rate": 0.00027487495638013257,
"loss": 1.0375,
"step": 38000
},
{
"epoch": 0.93,
"learning_rate": 0.00027347912062347326,
"loss": 1.0211,
"step": 40000
},
{
"epoch": 0.97,
"learning_rate": 0.00027208328486681395,
"loss": 1.0248,
"step": 42000
},
{
"epoch": 1.02,
"learning_rate": 0.0002706874491101547,
"loss": 1.0071,
"step": 44000
},
{
"epoch": 1.07,
"learning_rate": 0.0002692916133534954,
"loss": 0.9825,
"step": 46000
},
{
"epoch": 1.11,
"learning_rate": 0.0002678957775968361,
"loss": 0.983,
"step": 48000
},
{
"epoch": 1.16,
"learning_rate": 0.00026649994184017677,
"loss": 0.9831,
"step": 50000
},
{
"epoch": 1.2,
"learning_rate": 0.00026510410608351746,
"loss": 0.9792,
"step": 52000
},
{
"epoch": 1.25,
"learning_rate": 0.0002637082703268582,
"loss": 0.9744,
"step": 54000
},
{
"epoch": 1.3,
"learning_rate": 0.0002623124345701989,
"loss": 0.9668,
"step": 56000
},
{
"epoch": 1.34,
"learning_rate": 0.0002609165988135396,
"loss": 0.971,
"step": 58000
},
{
"epoch": 1.39,
"learning_rate": 0.0002595207630568803,
"loss": 0.9789,
"step": 60000
},
{
"epoch": 1.44,
"learning_rate": 0.000258124927300221,
"loss": 0.9794,
"step": 62000
},
{
"epoch": 1.48,
"learning_rate": 0.00025672909154356166,
"loss": 0.9615,
"step": 64000
},
{
"epoch": 1.53,
"learning_rate": 0.0002553332557869024,
"loss": 0.9577,
"step": 66000
},
{
"epoch": 1.57,
"learning_rate": 0.0002539374200302431,
"loss": 0.9573,
"step": 68000
},
{
"epoch": 1.62,
"learning_rate": 0.0002525415842735838,
"loss": 0.9633,
"step": 70000
},
{
"epoch": 1.67,
"learning_rate": 0.0002511457485169245,
"loss": 0.946,
"step": 72000
},
{
"epoch": 1.71,
"learning_rate": 0.00024974991276026517,
"loss": 0.9528,
"step": 74000
},
{
"epoch": 1.76,
"learning_rate": 0.0002483540770036059,
"loss": 0.9474,
"step": 76000
},
{
"epoch": 1.81,
"learning_rate": 0.0002469582412469466,
"loss": 0.9431,
"step": 78000
},
{
"epoch": 1.85,
"learning_rate": 0.0002455624054902873,
"loss": 0.9469,
"step": 80000
},
{
"epoch": 1.9,
"learning_rate": 0.000244166569733628,
"loss": 0.9442,
"step": 82000
},
{
"epoch": 1.95,
"learning_rate": 0.0002427707339769687,
"loss": 0.9387,
"step": 84000
},
{
"epoch": 1.99,
"learning_rate": 0.0002413748982203094,
"loss": 0.9341,
"step": 86000
},
{
"epoch": 2.04,
"learning_rate": 0.00023997906246365008,
"loss": 0.9124,
"step": 88000
},
{
"epoch": 2.08,
"learning_rate": 0.00023858322670699077,
"loss": 0.8962,
"step": 90000
},
{
"epoch": 2.13,
"learning_rate": 0.0002371873909503315,
"loss": 0.8974,
"step": 92000
},
{
"epoch": 2.18,
"learning_rate": 0.0002357915551936722,
"loss": 0.9122,
"step": 94000
},
{
"epoch": 2.22,
"learning_rate": 0.00023439571943701287,
"loss": 0.8977,
"step": 96000
},
{
"epoch": 2.27,
"learning_rate": 0.0002329998836803536,
"loss": 0.8972,
"step": 98000
},
{
"epoch": 2.32,
"learning_rate": 0.00023160404792369428,
"loss": 0.8969,
"step": 100000
},
{
"epoch": 2.36,
"learning_rate": 0.000230208212167035,
"loss": 0.9021,
"step": 102000
},
{
"epoch": 2.41,
"learning_rate": 0.00022881237641037572,
"loss": 0.9083,
"step": 104000
},
{
"epoch": 2.45,
"learning_rate": 0.00022741654065371638,
"loss": 0.8928,
"step": 106000
},
{
"epoch": 2.5,
"learning_rate": 0.0002260207048970571,
"loss": 0.8871,
"step": 108000
},
{
"epoch": 2.55,
"learning_rate": 0.0002246248691403978,
"loss": 0.9017,
"step": 110000
},
{
"epoch": 2.59,
"learning_rate": 0.0002232290333837385,
"loss": 0.8968,
"step": 112000
},
{
"epoch": 2.64,
"learning_rate": 0.00022183319762707917,
"loss": 0.8813,
"step": 114000
},
{
"epoch": 2.69,
"learning_rate": 0.0002204373618704199,
"loss": 0.8807,
"step": 116000
},
{
"epoch": 2.73,
"learning_rate": 0.0002190415261137606,
"loss": 0.8816,
"step": 118000
},
{
"epoch": 2.78,
"learning_rate": 0.0002176456903571013,
"loss": 0.8776,
"step": 120000
},
{
"epoch": 2.83,
"learning_rate": 0.000216249854600442,
"loss": 0.8738,
"step": 122000
},
{
"epoch": 2.87,
"learning_rate": 0.00021485401884378268,
"loss": 0.8826,
"step": 124000
},
{
"epoch": 2.92,
"learning_rate": 0.0002134581830871234,
"loss": 0.893,
"step": 126000
},
{
"epoch": 2.96,
"learning_rate": 0.00021206234733046412,
"loss": 0.8753,
"step": 128000
},
{
"epoch": 3.01,
"learning_rate": 0.00021066651157380478,
"loss": 0.8671,
"step": 130000
},
{
"epoch": 3.06,
"learning_rate": 0.0002092706758171455,
"loss": 0.8497,
"step": 132000
},
{
"epoch": 3.1,
"learning_rate": 0.00020787484006048622,
"loss": 0.8443,
"step": 134000
},
{
"epoch": 3.15,
"learning_rate": 0.0002064790043038269,
"loss": 0.8477,
"step": 136000
},
{
"epoch": 3.2,
"learning_rate": 0.0002050831685471676,
"loss": 0.8465,
"step": 138000
},
{
"epoch": 3.24,
"learning_rate": 0.0002036873327905083,
"loss": 0.8501,
"step": 140000
},
{
"epoch": 3.29,
"learning_rate": 0.000202291497033849,
"loss": 0.8451,
"step": 142000
},
{
"epoch": 3.33,
"learning_rate": 0.00020089566127718972,
"loss": 0.84,
"step": 144000
},
{
"epoch": 3.38,
"learning_rate": 0.0001994998255205304,
"loss": 0.8482,
"step": 146000
},
{
"epoch": 3.43,
"learning_rate": 0.0001981039897638711,
"loss": 0.8383,
"step": 148000
},
{
"epoch": 3.47,
"learning_rate": 0.0001967081540072118,
"loss": 0.8353,
"step": 150000
},
{
"epoch": 3.52,
"learning_rate": 0.00019531231825055251,
"loss": 0.8334,
"step": 152000
},
{
"epoch": 3.57,
"learning_rate": 0.00019391648249389318,
"loss": 0.8446,
"step": 154000
},
{
"epoch": 3.61,
"learning_rate": 0.0001925206467372339,
"loss": 0.8373,
"step": 156000
},
{
"epoch": 3.66,
"learning_rate": 0.00019112481098057461,
"loss": 0.8412,
"step": 158000
},
{
"epoch": 3.7,
"learning_rate": 0.0001897289752239153,
"loss": 0.8286,
"step": 160000
},
{
"epoch": 3.75,
"learning_rate": 0.000188333139467256,
"loss": 0.8327,
"step": 162000
},
{
"epoch": 3.8,
"learning_rate": 0.0001869373037105967,
"loss": 0.8426,
"step": 164000
},
{
"epoch": 3.84,
"learning_rate": 0.0001855414679539374,
"loss": 0.8291,
"step": 166000
},
{
"epoch": 3.89,
"learning_rate": 0.00018414563219727812,
"loss": 0.8271,
"step": 168000
},
{
"epoch": 3.94,
"learning_rate": 0.00018274979644061879,
"loss": 0.8278,
"step": 170000
},
{
"epoch": 3.98,
"learning_rate": 0.0001813539606839595,
"loss": 0.8307,
"step": 172000
},
{
"epoch": 4.03,
"learning_rate": 0.0001799581249273002,
"loss": 0.8109,
"step": 174000
},
{
"epoch": 4.08,
"learning_rate": 0.0001785622891706409,
"loss": 0.8027,
"step": 176000
},
{
"epoch": 4.12,
"learning_rate": 0.0001771664534139816,
"loss": 0.7955,
"step": 178000
},
{
"epoch": 4.17,
"learning_rate": 0.0001757706176573223,
"loss": 0.7888,
"step": 180000
},
{
"epoch": 4.21,
"learning_rate": 0.000174374781900663,
"loss": 0.799,
"step": 182000
},
{
"epoch": 4.26,
"learning_rate": 0.0001729789461440037,
"loss": 0.7949,
"step": 184000
},
{
"epoch": 4.31,
"learning_rate": 0.0001715831103873444,
"loss": 0.7913,
"step": 186000
},
{
"epoch": 4.35,
"learning_rate": 0.0001701872746306851,
"loss": 0.7862,
"step": 188000
},
{
"epoch": 4.4,
"learning_rate": 0.0001687914388740258,
"loss": 0.7882,
"step": 190000
},
{
"epoch": 4.45,
"learning_rate": 0.00016739560311736652,
"loss": 0.8017,
"step": 192000
},
{
"epoch": 4.49,
"learning_rate": 0.00016599976736070724,
"loss": 0.787,
"step": 194000
},
{
"epoch": 4.54,
"learning_rate": 0.0001646039316040479,
"loss": 0.7975,
"step": 196000
},
{
"epoch": 4.58,
"learning_rate": 0.00016320809584738862,
"loss": 0.7938,
"step": 198000
},
{
"epoch": 4.63,
"learning_rate": 0.0001618122600907293,
"loss": 0.799,
"step": 200000
},
{
"epoch": 4.68,
"learning_rate": 0.00016041642433407003,
"loss": 0.7811,
"step": 202000
},
{
"epoch": 4.72,
"learning_rate": 0.0001590205885774107,
"loss": 0.7941,
"step": 204000
},
{
"epoch": 4.77,
"learning_rate": 0.0001576247528207514,
"loss": 0.7918,
"step": 206000
},
{
"epoch": 4.82,
"learning_rate": 0.00015622891706409213,
"loss": 0.8006,
"step": 208000
},
{
"epoch": 4.86,
"learning_rate": 0.00015483308130743282,
"loss": 0.7896,
"step": 210000
},
{
"epoch": 4.91,
"learning_rate": 0.0001534372455507735,
"loss": 0.7944,
"step": 212000
},
{
"epoch": 4.96,
"learning_rate": 0.0001520414097941142,
"loss": 0.7869,
"step": 214000
},
{
"epoch": 5.0,
"learning_rate": 0.00015064557403745492,
"loss": 0.7847,
"step": 216000
},
{
"epoch": 5.05,
"learning_rate": 0.0001492497382807956,
"loss": 0.7577,
"step": 218000
},
{
"epoch": 5.09,
"learning_rate": 0.00014785390252413633,
"loss": 0.7527,
"step": 220000
},
{
"epoch": 5.14,
"learning_rate": 0.00014645806676747702,
"loss": 0.7554,
"step": 222000
},
{
"epoch": 5.19,
"learning_rate": 0.0001450622310108177,
"loss": 0.7633,
"step": 224000
},
{
"epoch": 5.23,
"learning_rate": 0.0001436663952541584,
"loss": 0.754,
"step": 226000
},
{
"epoch": 5.28,
"learning_rate": 0.00014227055949749912,
"loss": 0.7438,
"step": 228000
},
{
"epoch": 5.33,
"learning_rate": 0.0001408747237408398,
"loss": 0.7514,
"step": 230000
},
{
"epoch": 5.37,
"learning_rate": 0.00013947888798418053,
"loss": 0.7522,
"step": 232000
},
{
"epoch": 5.42,
"learning_rate": 0.00013808305222752122,
"loss": 0.7525,
"step": 234000
},
{
"epoch": 5.46,
"learning_rate": 0.00013668721647086193,
"loss": 0.7476,
"step": 236000
},
{
"epoch": 5.51,
"learning_rate": 0.00013529138071420263,
"loss": 0.747,
"step": 238000
},
{
"epoch": 5.56,
"learning_rate": 0.00013389554495754332,
"loss": 0.7505,
"step": 240000
},
{
"epoch": 5.6,
"learning_rate": 0.000132499709200884,
"loss": 0.7501,
"step": 242000
},
{
"epoch": 5.65,
"learning_rate": 0.00013110387344422472,
"loss": 0.7568,
"step": 244000
},
{
"epoch": 5.7,
"learning_rate": 0.00012970803768756542,
"loss": 0.7383,
"step": 246000
},
{
"epoch": 5.74,
"learning_rate": 0.00012831220193090613,
"loss": 0.7535,
"step": 248000
},
{
"epoch": 5.79,
"learning_rate": 0.00012691636617424682,
"loss": 0.751,
"step": 250000
},
{
"epoch": 5.84,
"learning_rate": 0.00012552053041758751,
"loss": 0.7396,
"step": 252000
},
{
"epoch": 5.88,
"learning_rate": 0.0001241246946609282,
"loss": 0.7448,
"step": 254000
},
{
"epoch": 5.93,
"learning_rate": 0.00012272885890426892,
"loss": 0.7521,
"step": 256000
},
{
"epoch": 5.97,
"learning_rate": 0.00012133302314760961,
"loss": 0.7472,
"step": 258000
},
{
"epoch": 6.02,
"learning_rate": 0.00011993718739095032,
"loss": 0.7344,
"step": 260000
},
{
"epoch": 6.07,
"learning_rate": 0.00011854135163429101,
"loss": 0.7143,
"step": 262000
},
{
"epoch": 6.11,
"learning_rate": 0.00011714551587763173,
"loss": 0.717,
"step": 264000
},
{
"epoch": 6.16,
"learning_rate": 0.00011574968012097242,
"loss": 0.7194,
"step": 266000
},
{
"epoch": 6.21,
"learning_rate": 0.00011435384436431312,
"loss": 0.7206,
"step": 268000
},
{
"epoch": 6.25,
"learning_rate": 0.00011295800860765381,
"loss": 0.7148,
"step": 270000
},
{
"epoch": 6.3,
"learning_rate": 0.00011156217285099453,
"loss": 0.7137,
"step": 272000
},
{
"epoch": 6.34,
"learning_rate": 0.00011016633709433522,
"loss": 0.7168,
"step": 274000
},
{
"epoch": 6.39,
"learning_rate": 0.00010877050133767593,
"loss": 0.7097,
"step": 276000
},
{
"epoch": 6.44,
"learning_rate": 0.00010737466558101662,
"loss": 0.7084,
"step": 278000
},
{
"epoch": 6.48,
"learning_rate": 0.00010597882982435732,
"loss": 0.7101,
"step": 280000
},
{
"epoch": 6.53,
"learning_rate": 0.00010458299406769801,
"loss": 0.7117,
"step": 282000
},
{
"epoch": 6.58,
"learning_rate": 0.00010318715831103873,
"loss": 0.7067,
"step": 284000
},
{
"epoch": 6.62,
"learning_rate": 0.00010179132255437942,
"loss": 0.7026,
"step": 286000
},
{
"epoch": 6.67,
"learning_rate": 0.00010039548679772013,
"loss": 0.7144,
"step": 288000
},
{
"epoch": 6.72,
"learning_rate": 9.899965104106083e-05,
"loss": 0.7118,
"step": 290000
},
{
"epoch": 6.76,
"learning_rate": 9.760381528440152e-05,
"loss": 0.7028,
"step": 292000
},
{
"epoch": 6.81,
"learning_rate": 9.620797952774224e-05,
"loss": 0.7044,
"step": 294000
},
{
"epoch": 6.85,
"learning_rate": 9.481214377108293e-05,
"loss": 0.704,
"step": 296000
},
{
"epoch": 6.9,
"learning_rate": 9.341630801442363e-05,
"loss": 0.7049,
"step": 298000
},
{
"epoch": 6.95,
"learning_rate": 9.202047225776432e-05,
"loss": 0.6937,
"step": 300000
},
{
"epoch": 6.99,
"learning_rate": 9.062463650110504e-05,
"loss": 0.7096,
"step": 302000
},
{
"epoch": 7.04,
"learning_rate": 8.922880074444573e-05,
"loss": 0.6749,
"step": 304000
},
{
"epoch": 7.09,
"learning_rate": 8.783296498778644e-05,
"loss": 0.6718,
"step": 306000
},
{
"epoch": 7.13,
"learning_rate": 8.643712923112713e-05,
"loss": 0.6789,
"step": 308000
},
{
"epoch": 7.18,
"learning_rate": 8.504129347446783e-05,
"loss": 0.6805,
"step": 310000
},
{
"epoch": 7.22,
"learning_rate": 8.364545771780852e-05,
"loss": 0.6776,
"step": 312000
},
{
"epoch": 7.27,
"learning_rate": 8.224962196114924e-05,
"loss": 0.6734,
"step": 314000
},
{
"epoch": 7.32,
"learning_rate": 8.085378620448993e-05,
"loss": 0.6765,
"step": 316000
},
{
"epoch": 7.36,
"learning_rate": 7.945795044783064e-05,
"loss": 0.6709,
"step": 318000
},
{
"epoch": 7.41,
"learning_rate": 7.806211469117133e-05,
"loss": 0.6804,
"step": 320000
},
{
"epoch": 7.46,
"learning_rate": 7.666627893451203e-05,
"loss": 0.6688,
"step": 322000
},
{
"epoch": 7.5,
"learning_rate": 7.527044317785274e-05,
"loss": 0.6789,
"step": 324000
},
{
"epoch": 7.55,
"learning_rate": 7.387460742119343e-05,
"loss": 0.6699,
"step": 326000
},
{
"epoch": 7.6,
"learning_rate": 7.247877166453413e-05,
"loss": 0.662,
"step": 328000
},
{
"epoch": 7.64,
"learning_rate": 7.108293590787484e-05,
"loss": 0.6657,
"step": 330000
},
{
"epoch": 7.69,
"learning_rate": 6.968710015121553e-05,
"loss": 0.6676,
"step": 332000
},
{
"epoch": 7.73,
"learning_rate": 6.829126439455623e-05,
"loss": 0.6698,
"step": 334000
},
{
"epoch": 7.78,
"learning_rate": 6.689542863789693e-05,
"loss": 0.6755,
"step": 336000
},
{
"epoch": 7.83,
"learning_rate": 6.549959288123764e-05,
"loss": 0.672,
"step": 338000
},
{
"epoch": 7.87,
"learning_rate": 6.410375712457834e-05,
"loss": 0.6616,
"step": 340000
},
{
"epoch": 7.92,
"learning_rate": 6.270792136791903e-05,
"loss": 0.6654,
"step": 342000
},
{
"epoch": 7.97,
"learning_rate": 6.131208561125974e-05,
"loss": 0.6606,
"step": 344000
},
{
"epoch": 8.01,
"learning_rate": 5.991624985460044e-05,
"loss": 0.6642,
"step": 346000
},
{
"epoch": 8.06,
"learning_rate": 5.852041409794114e-05,
"loss": 0.639,
"step": 348000
},
{
"epoch": 8.1,
"learning_rate": 5.712457834128184e-05,
"loss": 0.6463,
"step": 350000
},
{
"epoch": 8.15,
"learning_rate": 5.572874258462254e-05,
"loss": 0.6426,
"step": 352000
},
{
"epoch": 8.2,
"learning_rate": 5.433290682796324e-05,
"loss": 0.6356,
"step": 354000
},
{
"epoch": 8.24,
"learning_rate": 5.2937071071303944e-05,
"loss": 0.6336,
"step": 356000
},
{
"epoch": 8.29,
"learning_rate": 5.154123531464464e-05,
"loss": 0.631,
"step": 358000
},
{
"epoch": 8.34,
"learning_rate": 5.014539955798534e-05,
"loss": 0.6425,
"step": 360000
},
{
"epoch": 8.38,
"learning_rate": 4.8749563801326044e-05,
"loss": 0.6432,
"step": 362000
},
{
"epoch": 8.43,
"learning_rate": 4.735372804466674e-05,
"loss": 0.6449,
"step": 364000
},
{
"epoch": 8.48,
"learning_rate": 4.595789228800744e-05,
"loss": 0.635,
"step": 366000
},
{
"epoch": 8.52,
"learning_rate": 4.456205653134814e-05,
"loss": 0.6352,
"step": 368000
},
{
"epoch": 8.57,
"learning_rate": 4.316622077468884e-05,
"loss": 0.6281,
"step": 370000
},
{
"epoch": 8.61,
"learning_rate": 4.1770385018029545e-05,
"loss": 0.6291,
"step": 372000
},
{
"epoch": 8.66,
"learning_rate": 4.037454926137024e-05,
"loss": 0.6317,
"step": 374000
},
{
"epoch": 8.71,
"learning_rate": 3.897871350471094e-05,
"loss": 0.6296,
"step": 376000
},
{
"epoch": 8.75,
"learning_rate": 3.7582877748051644e-05,
"loss": 0.6279,
"step": 378000
},
{
"epoch": 8.8,
"learning_rate": 3.618704199139234e-05,
"loss": 0.6379,
"step": 380000
},
{
"epoch": 8.85,
"learning_rate": 3.4791206234733046e-05,
"loss": 0.6292,
"step": 382000
},
{
"epoch": 8.89,
"learning_rate": 3.3395370478073744e-05,
"loss": 0.6248,
"step": 384000
},
{
"epoch": 8.94,
"learning_rate": 3.199953472141444e-05,
"loss": 0.6199,
"step": 386000
},
{
"epoch": 8.98,
"learning_rate": 3.0603698964755146e-05,
"loss": 0.6173,
"step": 388000
}
],
"logging_steps": 2000,
"max_steps": 431850,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 7.94798748892796e+20,
"trial_name": null,
"trial_params": null
}