yesj1234's picture
Upload folder using huggingface_hub
3c198ad
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.0,
"eval_steps": 500,
"global_step": 97538,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 0.00014849999999999998,
"loss": 16.3517,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 0.0002985,
"loss": 4.5739,
"step": 1000
},
{
"epoch": 0.22,
"learning_rate": 0.00029856528670112555,
"loss": 2.3946,
"step": 1500
},
{
"epoch": 0.29,
"learning_rate": 0.00029711608134872706,
"loss": 1.7079,
"step": 2000
},
{
"epoch": 0.36,
"learning_rate": 0.0002956668759963287,
"loss": 1.5216,
"step": 2500
},
{
"epoch": 0.43,
"learning_rate": 0.0002942176706439302,
"loss": 1.4303,
"step": 3000
},
{
"epoch": 0.5,
"learning_rate": 0.00029277136370223656,
"loss": 1.343,
"step": 3500
},
{
"epoch": 0.57,
"learning_rate": 0.0002913221583498382,
"loss": 1.2865,
"step": 4000
},
{
"epoch": 0.65,
"learning_rate": 0.0002898729529974397,
"loss": 1.2574,
"step": 4500
},
{
"epoch": 0.72,
"learning_rate": 0.00028842374764504125,
"loss": 1.218,
"step": 5000
},
{
"epoch": 0.79,
"learning_rate": 0.0002869745422926428,
"loss": 1.1774,
"step": 5500
},
{
"epoch": 0.86,
"learning_rate": 0.0002855253369402444,
"loss": 1.1657,
"step": 6000
},
{
"epoch": 0.93,
"learning_rate": 0.00028407613158784595,
"loss": 1.1279,
"step": 6500
},
{
"epoch": 1.0,
"learning_rate": 0.0002826269262354475,
"loss": 1.1115,
"step": 7000
},
{
"epoch": 1.08,
"learning_rate": 0.00028117772088304913,
"loss": 1.0697,
"step": 7500
},
{
"epoch": 1.15,
"learning_rate": 0.00027972851553065064,
"loss": 1.047,
"step": 8000
},
{
"epoch": 1.22,
"learning_rate": 0.00027827931017825226,
"loss": 1.0408,
"step": 8500
},
{
"epoch": 1.29,
"learning_rate": 0.00027683010482585377,
"loss": 1.0267,
"step": 9000
},
{
"epoch": 1.36,
"learning_rate": 0.0002753808994734554,
"loss": 1.0195,
"step": 9500
},
{
"epoch": 1.44,
"learning_rate": 0.0002739374909424665,
"loss": 1.0126,
"step": 10000
},
{
"epoch": 1.51,
"learning_rate": 0.00027248828559006807,
"loss": 1.01,
"step": 10500
},
{
"epoch": 1.58,
"learning_rate": 0.0002710390802376697,
"loss": 0.9987,
"step": 11000
},
{
"epoch": 1.65,
"learning_rate": 0.0002695898748852712,
"loss": 0.9852,
"step": 11500
},
{
"epoch": 1.72,
"learning_rate": 0.0002681406695328728,
"loss": 0.9706,
"step": 12000
},
{
"epoch": 1.79,
"learning_rate": 0.00026669146418047433,
"loss": 0.9753,
"step": 12500
},
{
"epoch": 1.87,
"learning_rate": 0.00026524225882807595,
"loss": 0.9916,
"step": 13000
},
{
"epoch": 1.94,
"learning_rate": 0.00026379305347567746,
"loss": 0.9812,
"step": 13500
},
{
"epoch": 2.01,
"learning_rate": 0.0002623467465339838,
"loss": 0.9579,
"step": 14000
},
{
"epoch": 2.08,
"learning_rate": 0.0002608975411815854,
"loss": 0.9153,
"step": 14500
},
{
"epoch": 2.15,
"learning_rate": 0.00025944833582918696,
"loss": 0.9142,
"step": 15000
},
{
"epoch": 2.22,
"learning_rate": 0.0002579991304767885,
"loss": 0.9152,
"step": 15500
},
{
"epoch": 2.3,
"learning_rate": 0.0002565499251243901,
"loss": 0.8964,
"step": 16000
},
{
"epoch": 2.37,
"learning_rate": 0.00025510361818269645,
"loss": 0.9001,
"step": 16500
},
{
"epoch": 2.44,
"learning_rate": 0.000253654412830298,
"loss": 0.8901,
"step": 17000
},
{
"epoch": 2.51,
"learning_rate": 0.0002522081058886044,
"loss": 0.8914,
"step": 17500
},
{
"epoch": 2.58,
"learning_rate": 0.00025075890053620595,
"loss": 0.8872,
"step": 18000
},
{
"epoch": 2.66,
"learning_rate": 0.0002493096951838075,
"loss": 0.8726,
"step": 18500
},
{
"epoch": 2.73,
"learning_rate": 0.0002478604898314091,
"loss": 0.8719,
"step": 19000
},
{
"epoch": 2.8,
"learning_rate": 0.00024641418288971545,
"loss": 0.8746,
"step": 19500
},
{
"epoch": 2.87,
"learning_rate": 0.000244964977537317,
"loss": 0.8657,
"step": 20000
},
{
"epoch": 2.94,
"learning_rate": 0.00024351577218491858,
"loss": 0.8712,
"step": 20500
},
{
"epoch": 3.01,
"learning_rate": 0.00024206656683252017,
"loss": 0.8406,
"step": 21000
},
{
"epoch": 3.09,
"learning_rate": 0.0002406173614801217,
"loss": 0.8118,
"step": 21500
},
{
"epoch": 3.16,
"learning_rate": 0.0002391681561277233,
"loss": 0.8327,
"step": 22000
},
{
"epoch": 3.23,
"learning_rate": 0.00023771895077532483,
"loss": 0.8094,
"step": 22500
},
{
"epoch": 3.3,
"learning_rate": 0.0002362726438336312,
"loss": 0.8134,
"step": 23000
},
{
"epoch": 3.37,
"learning_rate": 0.00023482343848123277,
"loss": 0.7993,
"step": 23500
},
{
"epoch": 3.44,
"learning_rate": 0.00023337423312883433,
"loss": 0.8074,
"step": 24000
},
{
"epoch": 3.52,
"learning_rate": 0.0002319250277764359,
"loss": 0.7971,
"step": 24500
},
{
"epoch": 3.59,
"learning_rate": 0.0002304758224240375,
"loss": 0.7938,
"step": 25000
},
{
"epoch": 3.66,
"learning_rate": 0.00022902661707163902,
"loss": 0.8041,
"step": 25500
},
{
"epoch": 3.73,
"learning_rate": 0.0002275803101299454,
"loss": 0.7918,
"step": 26000
},
{
"epoch": 3.8,
"learning_rate": 0.00022613110477754696,
"loss": 0.7848,
"step": 26500
},
{
"epoch": 3.88,
"learning_rate": 0.00022468189942514852,
"loss": 0.7887,
"step": 27000
},
{
"epoch": 3.95,
"learning_rate": 0.00022323559248345486,
"loss": 0.7707,
"step": 27500
},
{
"epoch": 4.02,
"learning_rate": 0.00022178638713105645,
"loss": 0.7574,
"step": 28000
},
{
"epoch": 4.09,
"learning_rate": 0.000220337181778658,
"loss": 0.7468,
"step": 28500
},
{
"epoch": 4.16,
"learning_rate": 0.00021888797642625958,
"loss": 0.7387,
"step": 29000
},
{
"epoch": 4.23,
"learning_rate": 0.00021743877107386117,
"loss": 0.7381,
"step": 29500
},
{
"epoch": 4.31,
"learning_rate": 0.00021599246413216752,
"loss": 0.733,
"step": 30000
},
{
"epoch": 4.38,
"learning_rate": 0.00021454325877976908,
"loss": 0.7271,
"step": 30500
},
{
"epoch": 4.45,
"learning_rate": 0.00021309405342737064,
"loss": 0.7362,
"step": 31000
},
{
"epoch": 4.52,
"learning_rate": 0.00021164484807497218,
"loss": 0.7337,
"step": 31500
},
{
"epoch": 4.59,
"learning_rate": 0.00021019564272257377,
"loss": 0.727,
"step": 32000
},
{
"epoch": 4.66,
"learning_rate": 0.0002087464373701753,
"loss": 0.7262,
"step": 32500
},
{
"epoch": 4.74,
"learning_rate": 0.0002072972320177769,
"loss": 0.7257,
"step": 33000
},
{
"epoch": 4.81,
"learning_rate": 0.00020584802666537844,
"loss": 0.7228,
"step": 33500
},
{
"epoch": 4.88,
"learning_rate": 0.00020439882131298003,
"loss": 0.7281,
"step": 34000
},
{
"epoch": 4.95,
"learning_rate": 0.0002029525143712864,
"loss": 0.7263,
"step": 34500
},
{
"epoch": 5.02,
"learning_rate": 0.00020150330901888796,
"loss": 0.7053,
"step": 35000
},
{
"epoch": 5.1,
"learning_rate": 0.00020005410366648953,
"loss": 0.681,
"step": 35500
},
{
"epoch": 5.17,
"learning_rate": 0.00019860779672479587,
"loss": 0.6896,
"step": 36000
},
{
"epoch": 5.24,
"learning_rate": 0.00019715859137239746,
"loss": 0.6886,
"step": 36500
},
{
"epoch": 5.31,
"learning_rate": 0.000195709386019999,
"loss": 0.6883,
"step": 37000
},
{
"epoch": 5.38,
"learning_rate": 0.0001942601806676006,
"loss": 0.6835,
"step": 37500
},
{
"epoch": 5.45,
"learning_rate": 0.00019281097531520213,
"loss": 0.694,
"step": 38000
},
{
"epoch": 5.53,
"learning_rate": 0.00019136176996280372,
"loss": 0.6928,
"step": 38500
},
{
"epoch": 5.6,
"learning_rate": 0.00018991256461040528,
"loss": 0.689,
"step": 39000
},
{
"epoch": 5.67,
"learning_rate": 0.00018846625766871165,
"loss": 0.6876,
"step": 39500
},
{
"epoch": 5.74,
"learning_rate": 0.0001870170523163132,
"loss": 0.6777,
"step": 40000
},
{
"epoch": 5.81,
"learning_rate": 0.00018556784696391478,
"loss": 0.6777,
"step": 40500
},
{
"epoch": 5.88,
"learning_rate": 0.00018411864161151632,
"loss": 0.6744,
"step": 41000
},
{
"epoch": 5.96,
"learning_rate": 0.0001826694362591179,
"loss": 0.667,
"step": 41500
},
{
"epoch": 6.03,
"learning_rate": 0.00018122023090671945,
"loss": 0.6599,
"step": 42000
},
{
"epoch": 6.1,
"learning_rate": 0.00017977102555432104,
"loss": 0.63,
"step": 42500
},
{
"epoch": 6.17,
"learning_rate": 0.00017832182020192258,
"loss": 0.6447,
"step": 43000
},
{
"epoch": 6.24,
"learning_rate": 0.00017687261484952417,
"loss": 0.6461,
"step": 43500
},
{
"epoch": 6.32,
"learning_rate": 0.0001754234094971257,
"loss": 0.6407,
"step": 44000
},
{
"epoch": 6.39,
"learning_rate": 0.0001739771025554321,
"loss": 0.6391,
"step": 44500
},
{
"epoch": 6.46,
"learning_rate": 0.00017252789720303364,
"loss": 0.6352,
"step": 45000
},
{
"epoch": 6.53,
"learning_rate": 0.00017107869185063523,
"loss": 0.6365,
"step": 45500
},
{
"epoch": 6.6,
"learning_rate": 0.00016962948649823677,
"loss": 0.6407,
"step": 46000
},
{
"epoch": 6.67,
"learning_rate": 0.00016818028114583836,
"loss": 0.6432,
"step": 46500
},
{
"epoch": 6.75,
"learning_rate": 0.0001667310757934399,
"loss": 0.6268,
"step": 47000
},
{
"epoch": 6.82,
"learning_rate": 0.0001652818704410415,
"loss": 0.6371,
"step": 47500
},
{
"epoch": 6.89,
"learning_rate": 0.00016383266508864303,
"loss": 0.6351,
"step": 48000
},
{
"epoch": 6.96,
"learning_rate": 0.00016238635814694942,
"loss": 0.6332,
"step": 48500
},
{
"epoch": 7.03,
"learning_rate": 0.0001609400512052558,
"loss": 0.6173,
"step": 49000
},
{
"epoch": 7.1,
"learning_rate": 0.00015949084585285733,
"loss": 0.5989,
"step": 49500
},
{
"epoch": 7.18,
"learning_rate": 0.00015804164050045892,
"loss": 0.5929,
"step": 50000
},
{
"epoch": 7.25,
"learning_rate": 0.00015659243514806046,
"loss": 0.6017,
"step": 50500
},
{
"epoch": 7.32,
"learning_rate": 0.00015514322979566205,
"loss": 0.5986,
"step": 51000
},
{
"epoch": 7.39,
"learning_rate": 0.00015369402444326358,
"loss": 0.5993,
"step": 51500
},
{
"epoch": 7.46,
"learning_rate": 0.00015224481909086518,
"loss": 0.5997,
"step": 52000
},
{
"epoch": 7.54,
"learning_rate": 0.00015079561373846671,
"loss": 0.5846,
"step": 52500
},
{
"epoch": 7.61,
"learning_rate": 0.0001493464083860683,
"loss": 0.6038,
"step": 53000
},
{
"epoch": 7.68,
"learning_rate": 0.00014789720303366987,
"loss": 0.5991,
"step": 53500
},
{
"epoch": 7.75,
"learning_rate": 0.00014644799768127143,
"loss": 0.5947,
"step": 54000
},
{
"epoch": 7.82,
"learning_rate": 0.000144998792328873,
"loss": 0.5889,
"step": 54500
},
{
"epoch": 7.89,
"learning_rate": 0.00014354958697647456,
"loss": 0.5855,
"step": 55000
},
{
"epoch": 7.97,
"learning_rate": 0.0001421032800347809,
"loss": 0.5902,
"step": 55500
},
{
"epoch": 8.04,
"learning_rate": 0.00014065697309308727,
"loss": 0.5805,
"step": 56000
},
{
"epoch": 8.11,
"learning_rate": 0.00013920776774068884,
"loss": 0.5627,
"step": 56500
},
{
"epoch": 8.18,
"learning_rate": 0.0001377585623882904,
"loss": 0.5566,
"step": 57000
},
{
"epoch": 8.25,
"learning_rate": 0.00013630935703589197,
"loss": 0.5597,
"step": 57500
},
{
"epoch": 8.32,
"learning_rate": 0.00013486015168349353,
"loss": 0.5553,
"step": 58000
},
{
"epoch": 8.4,
"learning_rate": 0.0001334167431525047,
"loss": 0.5587,
"step": 58500
},
{
"epoch": 8.47,
"learning_rate": 0.00013196753780010627,
"loss": 0.5561,
"step": 59000
},
{
"epoch": 8.54,
"learning_rate": 0.00013051833244770783,
"loss": 0.5624,
"step": 59500
},
{
"epoch": 8.61,
"learning_rate": 0.0001290691270953094,
"loss": 0.5525,
"step": 60000
},
{
"epoch": 8.68,
"learning_rate": 0.00012761992174291096,
"loss": 0.5619,
"step": 60500
},
{
"epoch": 8.76,
"learning_rate": 0.00012617071639051252,
"loss": 0.5552,
"step": 61000
},
{
"epoch": 8.83,
"learning_rate": 0.0001247215110381141,
"loss": 0.5523,
"step": 61500
},
{
"epoch": 8.9,
"learning_rate": 0.00012327230568571565,
"loss": 0.5527,
"step": 62000
},
{
"epoch": 8.97,
"learning_rate": 0.00012182310033331722,
"loss": 0.553,
"step": 62500
},
{
"epoch": 9.04,
"learning_rate": 0.00012037679339162357,
"loss": 0.5306,
"step": 63000
},
{
"epoch": 9.11,
"learning_rate": 0.00011892758803922514,
"loss": 0.5233,
"step": 63500
},
{
"epoch": 9.19,
"learning_rate": 0.0001174783826868267,
"loss": 0.5203,
"step": 64000
},
{
"epoch": 9.26,
"learning_rate": 0.00011602917733442828,
"loss": 0.531,
"step": 64500
},
{
"epoch": 9.33,
"learning_rate": 0.00011458287039273465,
"loss": 0.5203,
"step": 65000
},
{
"epoch": 9.4,
"learning_rate": 0.00011313366504033621,
"loss": 0.5253,
"step": 65500
},
{
"epoch": 9.47,
"learning_rate": 0.00011168445968793778,
"loss": 0.5311,
"step": 66000
},
{
"epoch": 9.54,
"learning_rate": 0.00011023525433553934,
"loss": 0.5246,
"step": 66500
},
{
"epoch": 9.62,
"learning_rate": 0.0001087860489831409,
"loss": 0.5228,
"step": 67000
},
{
"epoch": 9.69,
"learning_rate": 0.00010733684363074247,
"loss": 0.5264,
"step": 67500
},
{
"epoch": 9.76,
"learning_rate": 0.00010589053668904882,
"loss": 0.529,
"step": 68000
},
{
"epoch": 9.83,
"learning_rate": 0.0001044413313366504,
"loss": 0.5248,
"step": 68500
},
{
"epoch": 9.9,
"learning_rate": 0.00010299502439495676,
"loss": 0.523,
"step": 69000
},
{
"epoch": 9.98,
"learning_rate": 0.00010154581904255832,
"loss": 0.5269,
"step": 69500
},
{
"epoch": 10.05,
"learning_rate": 0.00010009661369015989,
"loss": 0.5044,
"step": 70000
},
{
"epoch": 10.12,
"learning_rate": 9.865030674846624e-05,
"loss": 0.4907,
"step": 70500
},
{
"epoch": 10.19,
"learning_rate": 9.72011013960678e-05,
"loss": 0.4982,
"step": 71000
},
{
"epoch": 10.26,
"learning_rate": 9.575189604366937e-05,
"loss": 0.4995,
"step": 71500
},
{
"epoch": 10.33,
"learning_rate": 9.430269069127093e-05,
"loss": 0.494,
"step": 72000
},
{
"epoch": 10.41,
"learning_rate": 9.28534853388725e-05,
"loss": 0.4928,
"step": 72500
},
{
"epoch": 10.48,
"learning_rate": 9.140427998647408e-05,
"loss": 0.4902,
"step": 73000
},
{
"epoch": 10.55,
"learning_rate": 8.995507463407564e-05,
"loss": 0.4932,
"step": 73500
},
{
"epoch": 10.62,
"learning_rate": 8.85058692816772e-05,
"loss": 0.4932,
"step": 74000
},
{
"epoch": 10.69,
"learning_rate": 8.705666392927877e-05,
"loss": 0.4905,
"step": 74500
},
{
"epoch": 10.77,
"learning_rate": 8.560745857688034e-05,
"loss": 0.4927,
"step": 75000
},
{
"epoch": 10.84,
"learning_rate": 8.41582532244819e-05,
"loss": 0.4782,
"step": 75500
},
{
"epoch": 10.91,
"learning_rate": 8.270904787208346e-05,
"loss": 0.483,
"step": 76000
},
{
"epoch": 10.98,
"learning_rate": 8.126274093038982e-05,
"loss": 0.4826,
"step": 76500
},
{
"epoch": 11.05,
"learning_rate": 7.981353557799138e-05,
"loss": 0.4733,
"step": 77000
},
{
"epoch": 11.12,
"learning_rate": 7.836433022559295e-05,
"loss": 0.4609,
"step": 77500
},
{
"epoch": 11.2,
"learning_rate": 7.691512487319451e-05,
"loss": 0.4652,
"step": 78000
},
{
"epoch": 11.27,
"learning_rate": 7.54659195207961e-05,
"loss": 0.4594,
"step": 78500
},
{
"epoch": 11.34,
"learning_rate": 7.401671416839766e-05,
"loss": 0.4688,
"step": 79000
},
{
"epoch": 11.41,
"learning_rate": 7.257040722670402e-05,
"loss": 0.4633,
"step": 79500
},
{
"epoch": 11.48,
"learning_rate": 7.112120187430559e-05,
"loss": 0.4654,
"step": 80000
},
{
"epoch": 11.55,
"learning_rate": 6.967199652190715e-05,
"loss": 0.4637,
"step": 80500
},
{
"epoch": 11.63,
"learning_rate": 6.822568958021352e-05,
"loss": 0.4599,
"step": 81000
},
{
"epoch": 11.7,
"learning_rate": 6.677648422781507e-05,
"loss": 0.4608,
"step": 81500
},
{
"epoch": 11.77,
"learning_rate": 6.532727887541664e-05,
"loss": 0.4536,
"step": 82000
},
{
"epoch": 11.84,
"learning_rate": 6.38780735230182e-05,
"loss": 0.4576,
"step": 82500
},
{
"epoch": 11.91,
"learning_rate": 6.242886817061976e-05,
"loss": 0.4555,
"step": 83000
},
{
"epoch": 11.99,
"learning_rate": 6.0979662818221336e-05,
"loss": 0.4545,
"step": 83500
},
{
"epoch": 12.06,
"learning_rate": 5.95304574658229e-05,
"loss": 0.4332,
"step": 84000
},
{
"epoch": 12.13,
"learning_rate": 5.808125211342447e-05,
"loss": 0.4359,
"step": 84500
},
{
"epoch": 12.2,
"learning_rate": 5.663204676102604e-05,
"loss": 0.4358,
"step": 85000
},
{
"epoch": 12.27,
"learning_rate": 5.51828414086276e-05,
"loss": 0.4365,
"step": 85500
},
{
"epoch": 12.34,
"learning_rate": 5.373363605622916e-05,
"loss": 0.4411,
"step": 86000
},
{
"epoch": 12.42,
"learning_rate": 5.228732911453553e-05,
"loss": 0.4354,
"step": 86500
},
{
"epoch": 12.49,
"learning_rate": 5.083812376213709e-05,
"loss": 0.4366,
"step": 87000
},
{
"epoch": 12.56,
"learning_rate": 4.9388918409738656e-05,
"loss": 0.4313,
"step": 87500
},
{
"epoch": 12.63,
"learning_rate": 4.793971305734022e-05,
"loss": 0.4342,
"step": 88000
},
{
"epoch": 12.7,
"learning_rate": 4.6490507704941785e-05,
"loss": 0.4328,
"step": 88500
},
{
"epoch": 12.77,
"learning_rate": 4.504130235254335e-05,
"loss": 0.4327,
"step": 89000
},
{
"epoch": 12.85,
"learning_rate": 4.359499541084972e-05,
"loss": 0.4329,
"step": 89500
},
{
"epoch": 12.92,
"learning_rate": 4.2145790058451275e-05,
"loss": 0.4343,
"step": 90000
},
{
"epoch": 12.99,
"learning_rate": 4.069658470605284e-05,
"loss": 0.4249,
"step": 90500
},
{
"epoch": 13.06,
"learning_rate": 3.9247379353654405e-05,
"loss": 0.4166,
"step": 91000
},
{
"epoch": 13.13,
"learning_rate": 3.779817400125597e-05,
"loss": 0.4145,
"step": 91500
},
{
"epoch": 13.21,
"learning_rate": 3.635186705956234e-05,
"loss": 0.4156,
"step": 92000
},
{
"epoch": 13.28,
"learning_rate": 3.49026617071639e-05,
"loss": 0.4132,
"step": 92500
},
{
"epoch": 13.35,
"learning_rate": 3.345635476547026e-05,
"loss": 0.4062,
"step": 93000
},
{
"epoch": 13.42,
"learning_rate": 3.2007149413071834e-05,
"loss": 0.4141,
"step": 93500
},
{
"epoch": 13.49,
"learning_rate": 3.05579440606734e-05,
"loss": 0.4101,
"step": 94000
},
{
"epoch": 13.56,
"learning_rate": 2.910873870827496e-05,
"loss": 0.4153,
"step": 94500
},
{
"epoch": 13.64,
"learning_rate": 2.7659533355876528e-05,
"loss": 0.4096,
"step": 95000
},
{
"epoch": 13.71,
"learning_rate": 2.6210328003478092e-05,
"loss": 0.4118,
"step": 95500
},
{
"epoch": 13.78,
"learning_rate": 2.4761122651079657e-05,
"loss": 0.4113,
"step": 96000
},
{
"epoch": 13.85,
"learning_rate": 2.3314815709386018e-05,
"loss": 0.4117,
"step": 96500
},
{
"epoch": 13.92,
"learning_rate": 2.1865610356987582e-05,
"loss": 0.408,
"step": 97000
},
{
"epoch": 13.99,
"learning_rate": 2.041640500458915e-05,
"loss": 0.4088,
"step": 97500
}
],
"logging_steps": 500,
"max_steps": 104505,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 3.788962021131011e+20,
"trial_name": null,
"trial_params": null
}