m2m100_418M_ibo_en_rel / trainer_state.json
Davlan's picture
add MT model
8a19c96
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 123732,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.979795040894837e-05,
"loss": 2.8372,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.9595900817896746e-05,
"loss": 2.4789,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 4.9393851226845114e-05,
"loss": 2.2893,
"step": 1500
},
{
"epoch": 0.05,
"learning_rate": 4.9191801635793496e-05,
"loss": 2.1965,
"step": 2000
},
{
"epoch": 0.06,
"learning_rate": 4.8989752044741864e-05,
"loss": 2.1373,
"step": 2500
},
{
"epoch": 0.07,
"learning_rate": 4.878770245369024e-05,
"loss": 2.0767,
"step": 3000
},
{
"epoch": 0.08,
"learning_rate": 4.858565286263861e-05,
"loss": 2.0443,
"step": 3500
},
{
"epoch": 0.1,
"learning_rate": 4.838360327158698e-05,
"loss": 2.024,
"step": 4000
},
{
"epoch": 0.11,
"learning_rate": 4.818155368053535e-05,
"loss": 1.9501,
"step": 4500
},
{
"epoch": 0.12,
"learning_rate": 4.7979504089483726e-05,
"loss": 1.9562,
"step": 5000
},
{
"epoch": 0.13,
"learning_rate": 4.7777454498432095e-05,
"loss": 1.9222,
"step": 5500
},
{
"epoch": 0.15,
"learning_rate": 4.757540490738047e-05,
"loss": 1.8926,
"step": 6000
},
{
"epoch": 0.16,
"learning_rate": 4.7373355316328845e-05,
"loss": 1.8646,
"step": 6500
},
{
"epoch": 0.17,
"learning_rate": 4.717130572527721e-05,
"loss": 1.8655,
"step": 7000
},
{
"epoch": 0.18,
"learning_rate": 4.696925613422559e-05,
"loss": 1.8596,
"step": 7500
},
{
"epoch": 0.19,
"learning_rate": 4.676720654317396e-05,
"loss": 1.8408,
"step": 8000
},
{
"epoch": 0.21,
"learning_rate": 4.656515695212233e-05,
"loss": 1.8267,
"step": 8500
},
{
"epoch": 0.22,
"learning_rate": 4.63631073610707e-05,
"loss": 1.7901,
"step": 9000
},
{
"epoch": 0.23,
"learning_rate": 4.6161057770019075e-05,
"loss": 1.7815,
"step": 9500
},
{
"epoch": 0.24,
"learning_rate": 4.595900817896745e-05,
"loss": 1.785,
"step": 10000
},
{
"epoch": 0.25,
"learning_rate": 4.575695858791582e-05,
"loss": 1.7634,
"step": 10500
},
{
"epoch": 0.27,
"learning_rate": 4.5554908996864194e-05,
"loss": 1.7426,
"step": 11000
},
{
"epoch": 0.28,
"learning_rate": 4.535285940581256e-05,
"loss": 1.7472,
"step": 11500
},
{
"epoch": 0.29,
"learning_rate": 4.515080981476094e-05,
"loss": 1.7534,
"step": 12000
},
{
"epoch": 0.3,
"learning_rate": 4.4948760223709305e-05,
"loss": 1.7196,
"step": 12500
},
{
"epoch": 0.32,
"learning_rate": 4.474671063265768e-05,
"loss": 1.7174,
"step": 13000
},
{
"epoch": 0.33,
"learning_rate": 4.4544661041606056e-05,
"loss": 1.7015,
"step": 13500
},
{
"epoch": 0.34,
"learning_rate": 4.434261145055443e-05,
"loss": 1.6568,
"step": 14000
},
{
"epoch": 0.35,
"learning_rate": 4.41405618595028e-05,
"loss": 1.6854,
"step": 14500
},
{
"epoch": 0.36,
"learning_rate": 4.393851226845117e-05,
"loss": 1.7124,
"step": 15000
},
{
"epoch": 0.38,
"learning_rate": 4.373646267739954e-05,
"loss": 1.6685,
"step": 15500
},
{
"epoch": 0.39,
"learning_rate": 4.353441308634791e-05,
"loss": 1.6657,
"step": 16000
},
{
"epoch": 0.4,
"learning_rate": 4.3332363495296286e-05,
"loss": 1.6733,
"step": 16500
},
{
"epoch": 0.41,
"learning_rate": 4.313031390424466e-05,
"loss": 1.6467,
"step": 17000
},
{
"epoch": 0.42,
"learning_rate": 4.2928264313193036e-05,
"loss": 1.6647,
"step": 17500
},
{
"epoch": 0.44,
"learning_rate": 4.2726214722141404e-05,
"loss": 1.6391,
"step": 18000
},
{
"epoch": 0.45,
"learning_rate": 4.252416513108978e-05,
"loss": 1.6388,
"step": 18500
},
{
"epoch": 0.46,
"learning_rate": 4.232211554003815e-05,
"loss": 1.6363,
"step": 19000
},
{
"epoch": 0.47,
"learning_rate": 4.212006594898652e-05,
"loss": 1.6375,
"step": 19500
},
{
"epoch": 0.48,
"learning_rate": 4.191801635793489e-05,
"loss": 1.6179,
"step": 20000
},
{
"epoch": 0.5,
"learning_rate": 4.1715966766883266e-05,
"loss": 1.6001,
"step": 20500
},
{
"epoch": 0.51,
"learning_rate": 4.151391717583164e-05,
"loss": 1.6116,
"step": 21000
},
{
"epoch": 0.52,
"learning_rate": 4.131186758478001e-05,
"loss": 1.592,
"step": 21500
},
{
"epoch": 0.53,
"learning_rate": 4.1109817993728385e-05,
"loss": 1.5869,
"step": 22000
},
{
"epoch": 0.55,
"learning_rate": 4.090776840267675e-05,
"loss": 1.5869,
"step": 22500
},
{
"epoch": 0.56,
"learning_rate": 4.070571881162513e-05,
"loss": 1.6034,
"step": 23000
},
{
"epoch": 0.57,
"learning_rate": 4.05036692205735e-05,
"loss": 1.5646,
"step": 23500
},
{
"epoch": 0.58,
"learning_rate": 4.030161962952187e-05,
"loss": 1.5619,
"step": 24000
},
{
"epoch": 0.59,
"learning_rate": 4.009957003847025e-05,
"loss": 1.5718,
"step": 24500
},
{
"epoch": 0.61,
"learning_rate": 3.9897520447418615e-05,
"loss": 1.5903,
"step": 25000
},
{
"epoch": 0.62,
"learning_rate": 3.969547085636699e-05,
"loss": 1.5563,
"step": 25500
},
{
"epoch": 0.63,
"learning_rate": 3.949342126531536e-05,
"loss": 1.5549,
"step": 26000
},
{
"epoch": 0.64,
"learning_rate": 3.9291371674263734e-05,
"loss": 1.5373,
"step": 26500
},
{
"epoch": 0.65,
"learning_rate": 3.90893220832121e-05,
"loss": 1.5607,
"step": 27000
},
{
"epoch": 0.67,
"learning_rate": 3.888727249216048e-05,
"loss": 1.5519,
"step": 27500
},
{
"epoch": 0.68,
"learning_rate": 3.8685222901108846e-05,
"loss": 1.5209,
"step": 28000
},
{
"epoch": 0.69,
"learning_rate": 3.848317331005723e-05,
"loss": 1.5354,
"step": 28500
},
{
"epoch": 0.7,
"learning_rate": 3.8281123719005596e-05,
"loss": 1.5082,
"step": 29000
},
{
"epoch": 0.72,
"learning_rate": 3.807907412795397e-05,
"loss": 1.506,
"step": 29500
},
{
"epoch": 0.73,
"learning_rate": 3.787702453690234e-05,
"loss": 1.5307,
"step": 30000
},
{
"epoch": 0.74,
"learning_rate": 3.767497494585071e-05,
"loss": 1.501,
"step": 30500
},
{
"epoch": 0.75,
"learning_rate": 3.747292535479908e-05,
"loss": 1.5334,
"step": 31000
},
{
"epoch": 0.76,
"learning_rate": 3.727087576374745e-05,
"loss": 1.513,
"step": 31500
},
{
"epoch": 0.78,
"learning_rate": 3.706882617269583e-05,
"loss": 1.5083,
"step": 32000
},
{
"epoch": 0.79,
"learning_rate": 3.68667765816442e-05,
"loss": 1.4999,
"step": 32500
},
{
"epoch": 0.8,
"learning_rate": 3.6664726990592576e-05,
"loss": 1.4755,
"step": 33000
},
{
"epoch": 0.81,
"learning_rate": 3.6462677399540945e-05,
"loss": 1.5159,
"step": 33500
},
{
"epoch": 0.82,
"learning_rate": 3.626062780848932e-05,
"loss": 1.5126,
"step": 34000
},
{
"epoch": 0.84,
"learning_rate": 3.605857821743769e-05,
"loss": 1.4948,
"step": 34500
},
{
"epoch": 0.85,
"learning_rate": 3.585652862638606e-05,
"loss": 1.4975,
"step": 35000
},
{
"epoch": 0.86,
"learning_rate": 3.565447903533443e-05,
"loss": 1.5006,
"step": 35500
},
{
"epoch": 0.87,
"learning_rate": 3.5452429444282807e-05,
"loss": 1.4772,
"step": 36000
},
{
"epoch": 0.88,
"learning_rate": 3.525037985323118e-05,
"loss": 1.4721,
"step": 36500
},
{
"epoch": 0.9,
"learning_rate": 3.504833026217955e-05,
"loss": 1.4934,
"step": 37000
},
{
"epoch": 0.91,
"learning_rate": 3.4846280671127925e-05,
"loss": 1.47,
"step": 37500
},
{
"epoch": 0.92,
"learning_rate": 3.4644231080076293e-05,
"loss": 1.471,
"step": 38000
},
{
"epoch": 0.93,
"learning_rate": 3.444218148902467e-05,
"loss": 1.4491,
"step": 38500
},
{
"epoch": 0.95,
"learning_rate": 3.424013189797304e-05,
"loss": 1.4718,
"step": 39000
},
{
"epoch": 0.96,
"learning_rate": 3.403808230692141e-05,
"loss": 1.4481,
"step": 39500
},
{
"epoch": 0.97,
"learning_rate": 3.383603271586979e-05,
"loss": 1.4609,
"step": 40000
},
{
"epoch": 0.98,
"learning_rate": 3.3633983124818155e-05,
"loss": 1.4501,
"step": 40500
},
{
"epoch": 0.99,
"learning_rate": 3.343193353376653e-05,
"loss": 1.4285,
"step": 41000
},
{
"epoch": 1.01,
"learning_rate": 3.32298839427149e-05,
"loss": 1.349,
"step": 41500
},
{
"epoch": 1.02,
"learning_rate": 3.3027834351663274e-05,
"loss": 1.3115,
"step": 42000
},
{
"epoch": 1.03,
"learning_rate": 3.282578476061164e-05,
"loss": 1.3064,
"step": 42500
},
{
"epoch": 1.04,
"learning_rate": 3.262373516956002e-05,
"loss": 1.2838,
"step": 43000
},
{
"epoch": 1.05,
"learning_rate": 3.242168557850839e-05,
"loss": 1.2855,
"step": 43500
},
{
"epoch": 1.07,
"learning_rate": 3.221963598745677e-05,
"loss": 1.2931,
"step": 44000
},
{
"epoch": 1.08,
"learning_rate": 3.2017586396405136e-05,
"loss": 1.2874,
"step": 44500
},
{
"epoch": 1.09,
"learning_rate": 3.1815536805353504e-05,
"loss": 1.2677,
"step": 45000
},
{
"epoch": 1.1,
"learning_rate": 3.161348721430188e-05,
"loss": 1.2898,
"step": 45500
},
{
"epoch": 1.12,
"learning_rate": 3.141143762325025e-05,
"loss": 1.2906,
"step": 46000
},
{
"epoch": 1.13,
"learning_rate": 3.120938803219862e-05,
"loss": 1.293,
"step": 46500
},
{
"epoch": 1.14,
"learning_rate": 3.1007338441147e-05,
"loss": 1.2844,
"step": 47000
},
{
"epoch": 1.15,
"learning_rate": 3.080528885009537e-05,
"loss": 1.295,
"step": 47500
},
{
"epoch": 1.16,
"learning_rate": 3.060323925904374e-05,
"loss": 1.3018,
"step": 48000
},
{
"epoch": 1.18,
"learning_rate": 3.0401189667992113e-05,
"loss": 1.29,
"step": 48500
},
{
"epoch": 1.19,
"learning_rate": 3.0199140076940485e-05,
"loss": 1.2879,
"step": 49000
},
{
"epoch": 1.2,
"learning_rate": 2.9997090485888856e-05,
"loss": 1.3044,
"step": 49500
},
{
"epoch": 1.21,
"learning_rate": 2.9795040894837228e-05,
"loss": 1.3007,
"step": 50000
},
{
"epoch": 1.22,
"learning_rate": 2.95929913037856e-05,
"loss": 1.2731,
"step": 50500
},
{
"epoch": 1.24,
"learning_rate": 2.939094171273398e-05,
"loss": 1.2825,
"step": 51000
},
{
"epoch": 1.25,
"learning_rate": 2.918889212168235e-05,
"loss": 1.2862,
"step": 51500
},
{
"epoch": 1.26,
"learning_rate": 2.898684253063072e-05,
"loss": 1.2784,
"step": 52000
},
{
"epoch": 1.27,
"learning_rate": 2.878479293957909e-05,
"loss": 1.2938,
"step": 52500
},
{
"epoch": 1.29,
"learning_rate": 2.8582743348527462e-05,
"loss": 1.2863,
"step": 53000
},
{
"epoch": 1.3,
"learning_rate": 2.8380693757475834e-05,
"loss": 1.2642,
"step": 53500
},
{
"epoch": 1.31,
"learning_rate": 2.8178644166424205e-05,
"loss": 1.2798,
"step": 54000
},
{
"epoch": 1.32,
"learning_rate": 2.7976594575372584e-05,
"loss": 1.2822,
"step": 54500
},
{
"epoch": 1.33,
"learning_rate": 2.7774544984320956e-05,
"loss": 1.2814,
"step": 55000
},
{
"epoch": 1.35,
"learning_rate": 2.7572495393269327e-05,
"loss": 1.2556,
"step": 55500
},
{
"epoch": 1.36,
"learning_rate": 2.73704458022177e-05,
"loss": 1.2496,
"step": 56000
},
{
"epoch": 1.37,
"learning_rate": 2.716839621116607e-05,
"loss": 1.2556,
"step": 56500
},
{
"epoch": 1.38,
"learning_rate": 2.6966346620114442e-05,
"loss": 1.267,
"step": 57000
},
{
"epoch": 1.39,
"learning_rate": 2.6764297029062814e-05,
"loss": 1.2647,
"step": 57500
},
{
"epoch": 1.41,
"learning_rate": 2.6562247438011182e-05,
"loss": 1.2617,
"step": 58000
},
{
"epoch": 1.42,
"learning_rate": 2.636019784695956e-05,
"loss": 1.2625,
"step": 58500
},
{
"epoch": 1.43,
"learning_rate": 2.6158148255907933e-05,
"loss": 1.267,
"step": 59000
},
{
"epoch": 1.44,
"learning_rate": 2.5956098664856304e-05,
"loss": 1.2708,
"step": 59500
},
{
"epoch": 1.45,
"learning_rate": 2.5754049073804676e-05,
"loss": 1.2623,
"step": 60000
},
{
"epoch": 1.47,
"learning_rate": 2.5551999482753048e-05,
"loss": 1.2569,
"step": 60500
},
{
"epoch": 1.48,
"learning_rate": 2.534994989170142e-05,
"loss": 1.2438,
"step": 61000
},
{
"epoch": 1.49,
"learning_rate": 2.514790030064979e-05,
"loss": 1.2695,
"step": 61500
},
{
"epoch": 1.5,
"learning_rate": 2.4945850709598166e-05,
"loss": 1.2551,
"step": 62000
},
{
"epoch": 1.52,
"learning_rate": 2.4743801118546535e-05,
"loss": 1.2633,
"step": 62500
},
{
"epoch": 1.53,
"learning_rate": 2.454175152749491e-05,
"loss": 1.2424,
"step": 63000
},
{
"epoch": 1.54,
"learning_rate": 2.433970193644328e-05,
"loss": 1.2659,
"step": 63500
},
{
"epoch": 1.55,
"learning_rate": 2.4137652345391653e-05,
"loss": 1.2639,
"step": 64000
},
{
"epoch": 1.56,
"learning_rate": 2.3935602754340025e-05,
"loss": 1.2428,
"step": 64500
},
{
"epoch": 1.58,
"learning_rate": 2.37335531632884e-05,
"loss": 1.2475,
"step": 65000
},
{
"epoch": 1.59,
"learning_rate": 2.3531503572236772e-05,
"loss": 1.2645,
"step": 65500
},
{
"epoch": 1.6,
"learning_rate": 2.3329453981185143e-05,
"loss": 1.2685,
"step": 66000
},
{
"epoch": 1.61,
"learning_rate": 2.3127404390133515e-05,
"loss": 1.2612,
"step": 66500
},
{
"epoch": 1.62,
"learning_rate": 2.2925354799081887e-05,
"loss": 1.2448,
"step": 67000
},
{
"epoch": 1.64,
"learning_rate": 2.272330520803026e-05,
"loss": 1.2503,
"step": 67500
},
{
"epoch": 1.65,
"learning_rate": 2.252125561697863e-05,
"loss": 1.2255,
"step": 68000
},
{
"epoch": 1.66,
"learning_rate": 2.2319206025927005e-05,
"loss": 1.2437,
"step": 68500
},
{
"epoch": 1.67,
"learning_rate": 2.2117156434875377e-05,
"loss": 1.2258,
"step": 69000
},
{
"epoch": 1.69,
"learning_rate": 2.191510684382375e-05,
"loss": 1.2203,
"step": 69500
},
{
"epoch": 1.7,
"learning_rate": 2.171305725277212e-05,
"loss": 1.2359,
"step": 70000
},
{
"epoch": 1.71,
"learning_rate": 2.1511007661720496e-05,
"loss": 1.2359,
"step": 70500
},
{
"epoch": 1.72,
"learning_rate": 2.1308958070668867e-05,
"loss": 1.239,
"step": 71000
},
{
"epoch": 1.73,
"learning_rate": 2.110690847961724e-05,
"loss": 1.21,
"step": 71500
},
{
"epoch": 1.75,
"learning_rate": 2.090485888856561e-05,
"loss": 1.1985,
"step": 72000
},
{
"epoch": 1.76,
"learning_rate": 2.0702809297513983e-05,
"loss": 1.2627,
"step": 72500
},
{
"epoch": 1.77,
"learning_rate": 2.0500759706462354e-05,
"loss": 1.2186,
"step": 73000
},
{
"epoch": 1.78,
"learning_rate": 2.0298710115410726e-05,
"loss": 1.2453,
"step": 73500
},
{
"epoch": 1.79,
"learning_rate": 2.0096660524359098e-05,
"loss": 1.227,
"step": 74000
},
{
"epoch": 1.81,
"learning_rate": 1.9894610933307473e-05,
"loss": 1.2383,
"step": 74500
},
{
"epoch": 1.82,
"learning_rate": 1.9692561342255844e-05,
"loss": 1.2272,
"step": 75000
},
{
"epoch": 1.83,
"learning_rate": 1.9490511751204216e-05,
"loss": 1.2031,
"step": 75500
},
{
"epoch": 1.84,
"learning_rate": 1.928846216015259e-05,
"loss": 1.2024,
"step": 76000
},
{
"epoch": 1.85,
"learning_rate": 1.9086412569100963e-05,
"loss": 1.2257,
"step": 76500
},
{
"epoch": 1.87,
"learning_rate": 1.8884362978049335e-05,
"loss": 1.2075,
"step": 77000
},
{
"epoch": 1.88,
"learning_rate": 1.8682313386997703e-05,
"loss": 1.2449,
"step": 77500
},
{
"epoch": 1.89,
"learning_rate": 1.8480263795946078e-05,
"loss": 1.2141,
"step": 78000
},
{
"epoch": 1.9,
"learning_rate": 1.827821420489445e-05,
"loss": 1.2161,
"step": 78500
},
{
"epoch": 1.92,
"learning_rate": 1.807616461384282e-05,
"loss": 1.2141,
"step": 79000
},
{
"epoch": 1.93,
"learning_rate": 1.7874115022791193e-05,
"loss": 1.1951,
"step": 79500
},
{
"epoch": 1.94,
"learning_rate": 1.767206543173957e-05,
"loss": 1.2105,
"step": 80000
},
{
"epoch": 1.95,
"learning_rate": 1.747001584068794e-05,
"loss": 1.187,
"step": 80500
},
{
"epoch": 1.96,
"learning_rate": 1.7267966249636312e-05,
"loss": 1.2099,
"step": 81000
},
{
"epoch": 1.98,
"learning_rate": 1.7065916658584684e-05,
"loss": 1.2091,
"step": 81500
},
{
"epoch": 1.99,
"learning_rate": 1.6863867067533055e-05,
"loss": 1.2034,
"step": 82000
},
{
"epoch": 2.0,
"learning_rate": 1.6661817476481427e-05,
"loss": 1.1969,
"step": 82500
},
{
"epoch": 2.01,
"learning_rate": 1.64597678854298e-05,
"loss": 1.0541,
"step": 83000
},
{
"epoch": 2.02,
"learning_rate": 1.6257718294378174e-05,
"loss": 1.0428,
"step": 83500
},
{
"epoch": 2.04,
"learning_rate": 1.6055668703326546e-05,
"loss": 1.0576,
"step": 84000
},
{
"epoch": 2.05,
"learning_rate": 1.5853619112274917e-05,
"loss": 1.0312,
"step": 84500
},
{
"epoch": 2.06,
"learning_rate": 1.565156952122329e-05,
"loss": 1.0415,
"step": 85000
},
{
"epoch": 2.07,
"learning_rate": 1.5449519930171664e-05,
"loss": 1.0685,
"step": 85500
},
{
"epoch": 2.09,
"learning_rate": 1.5247470339120034e-05,
"loss": 1.0584,
"step": 86000
},
{
"epoch": 2.1,
"learning_rate": 1.5045420748068406e-05,
"loss": 1.0444,
"step": 86500
},
{
"epoch": 2.11,
"learning_rate": 1.4843371157016778e-05,
"loss": 1.0456,
"step": 87000
},
{
"epoch": 2.12,
"learning_rate": 1.4641321565965153e-05,
"loss": 1.0597,
"step": 87500
},
{
"epoch": 2.13,
"learning_rate": 1.4439271974913524e-05,
"loss": 1.0505,
"step": 88000
},
{
"epoch": 2.15,
"learning_rate": 1.4237222383861894e-05,
"loss": 1.0605,
"step": 88500
},
{
"epoch": 2.16,
"learning_rate": 1.4035172792810266e-05,
"loss": 1.0581,
"step": 89000
},
{
"epoch": 2.17,
"learning_rate": 1.3833123201758641e-05,
"loss": 1.0357,
"step": 89500
},
{
"epoch": 2.18,
"learning_rate": 1.3631073610707013e-05,
"loss": 1.0408,
"step": 90000
},
{
"epoch": 2.19,
"learning_rate": 1.3429024019655385e-05,
"loss": 1.0404,
"step": 90500
},
{
"epoch": 2.21,
"learning_rate": 1.3226974428603758e-05,
"loss": 1.039,
"step": 91000
},
{
"epoch": 2.22,
"learning_rate": 1.302492483755213e-05,
"loss": 1.0434,
"step": 91500
},
{
"epoch": 2.23,
"learning_rate": 1.2822875246500501e-05,
"loss": 1.0348,
"step": 92000
},
{
"epoch": 2.24,
"learning_rate": 1.2620825655448873e-05,
"loss": 1.0526,
"step": 92500
},
{
"epoch": 2.25,
"learning_rate": 1.2418776064397247e-05,
"loss": 1.0395,
"step": 93000
},
{
"epoch": 2.27,
"learning_rate": 1.2216726473345618e-05,
"loss": 1.0409,
"step": 93500
},
{
"epoch": 2.28,
"learning_rate": 1.201467688229399e-05,
"loss": 1.0396,
"step": 94000
},
{
"epoch": 2.29,
"learning_rate": 1.1812627291242363e-05,
"loss": 1.055,
"step": 94500
},
{
"epoch": 2.3,
"learning_rate": 1.1610577700190735e-05,
"loss": 1.0594,
"step": 95000
},
{
"epoch": 2.32,
"learning_rate": 1.1408528109139109e-05,
"loss": 1.04,
"step": 95500
},
{
"epoch": 2.33,
"learning_rate": 1.1206478518087479e-05,
"loss": 1.0421,
"step": 96000
},
{
"epoch": 2.34,
"learning_rate": 1.1004428927035852e-05,
"loss": 1.0507,
"step": 96500
},
{
"epoch": 2.35,
"learning_rate": 1.0802379335984225e-05,
"loss": 1.0275,
"step": 97000
},
{
"epoch": 2.36,
"learning_rate": 1.0600329744932597e-05,
"loss": 1.0356,
"step": 97500
},
{
"epoch": 2.38,
"learning_rate": 1.0398280153880969e-05,
"loss": 1.0379,
"step": 98000
},
{
"epoch": 2.39,
"learning_rate": 1.019623056282934e-05,
"loss": 1.0256,
"step": 98500
},
{
"epoch": 2.4,
"learning_rate": 9.994180971777714e-06,
"loss": 1.0462,
"step": 99000
},
{
"epoch": 2.41,
"learning_rate": 9.792131380726086e-06,
"loss": 1.0387,
"step": 99500
},
{
"epoch": 2.42,
"learning_rate": 9.590081789674459e-06,
"loss": 1.0175,
"step": 100000
},
{
"epoch": 2.44,
"learning_rate": 9.38803219862283e-06,
"loss": 1.0262,
"step": 100500
},
{
"epoch": 2.45,
"learning_rate": 9.185982607571203e-06,
"loss": 1.0545,
"step": 101000
},
{
"epoch": 2.46,
"learning_rate": 8.983933016519574e-06,
"loss": 1.0339,
"step": 101500
},
{
"epoch": 2.47,
"learning_rate": 8.781883425467948e-06,
"loss": 1.0378,
"step": 102000
},
{
"epoch": 2.49,
"learning_rate": 8.57983383441632e-06,
"loss": 1.034,
"step": 102500
},
{
"epoch": 2.5,
"learning_rate": 8.377784243364691e-06,
"loss": 1.0412,
"step": 103000
},
{
"epoch": 2.51,
"learning_rate": 8.175734652313063e-06,
"loss": 1.0321,
"step": 103500
},
{
"epoch": 2.52,
"learning_rate": 7.973685061261436e-06,
"loss": 1.0341,
"step": 104000
},
{
"epoch": 2.53,
"learning_rate": 7.77163547020981e-06,
"loss": 1.0207,
"step": 104500
},
{
"epoch": 2.55,
"learning_rate": 7.5695858791581805e-06,
"loss": 1.0017,
"step": 105000
},
{
"epoch": 2.56,
"learning_rate": 7.367536288106554e-06,
"loss": 1.0238,
"step": 105500
},
{
"epoch": 2.57,
"learning_rate": 7.165486697054926e-06,
"loss": 1.0219,
"step": 106000
},
{
"epoch": 2.58,
"learning_rate": 6.963437106003298e-06,
"loss": 1.0153,
"step": 106500
},
{
"epoch": 2.59,
"learning_rate": 6.76138751495167e-06,
"loss": 1.0296,
"step": 107000
},
{
"epoch": 2.61,
"learning_rate": 6.5593379239000424e-06,
"loss": 1.0198,
"step": 107500
},
{
"epoch": 2.62,
"learning_rate": 6.357288332848414e-06,
"loss": 1.0137,
"step": 108000
},
{
"epoch": 2.63,
"learning_rate": 6.1552387417967876e-06,
"loss": 1.0256,
"step": 108500
},
{
"epoch": 2.64,
"learning_rate": 5.953189150745159e-06,
"loss": 1.0119,
"step": 109000
},
{
"epoch": 2.65,
"learning_rate": 5.751139559693532e-06,
"loss": 1.0177,
"step": 109500
},
{
"epoch": 2.67,
"learning_rate": 5.5490899686419036e-06,
"loss": 1.0131,
"step": 110000
},
{
"epoch": 2.68,
"learning_rate": 5.347040377590276e-06,
"loss": 0.9982,
"step": 110500
},
{
"epoch": 2.69,
"learning_rate": 5.144990786538649e-06,
"loss": 1.0195,
"step": 111000
},
{
"epoch": 2.7,
"learning_rate": 4.94294119548702e-06,
"loss": 1.0126,
"step": 111500
},
{
"epoch": 2.72,
"learning_rate": 4.740891604435393e-06,
"loss": 1.0218,
"step": 112000
},
{
"epoch": 2.73,
"learning_rate": 4.538842013383765e-06,
"loss": 1.0432,
"step": 112500
},
{
"epoch": 2.74,
"learning_rate": 4.336792422332137e-06,
"loss": 1.0058,
"step": 113000
},
{
"epoch": 2.75,
"learning_rate": 4.13474283128051e-06,
"loss": 1.0041,
"step": 113500
},
{
"epoch": 2.76,
"learning_rate": 3.9326932402288815e-06,
"loss": 1.0208,
"step": 114000
},
{
"epoch": 2.78,
"learning_rate": 3.730643649177254e-06,
"loss": 0.9971,
"step": 114500
},
{
"epoch": 2.79,
"learning_rate": 3.5285940581256262e-06,
"loss": 1.0126,
"step": 115000
},
{
"epoch": 2.8,
"learning_rate": 3.3265444670739992e-06,
"loss": 1.0259,
"step": 115500
},
{
"epoch": 2.81,
"learning_rate": 3.124494876022371e-06,
"loss": 1.0374,
"step": 116000
},
{
"epoch": 2.82,
"learning_rate": 2.922445284970743e-06,
"loss": 1.025,
"step": 116500
},
{
"epoch": 2.84,
"learning_rate": 2.7203956939191156e-06,
"loss": 1.0199,
"step": 117000
},
{
"epoch": 2.85,
"learning_rate": 2.518346102867488e-06,
"loss": 1.0116,
"step": 117500
},
{
"epoch": 2.86,
"learning_rate": 2.3162965118158603e-06,
"loss": 1.0321,
"step": 118000
},
{
"epoch": 2.87,
"learning_rate": 2.1142469207642325e-06,
"loss": 1.0162,
"step": 118500
},
{
"epoch": 2.89,
"learning_rate": 1.9121973297126046e-06,
"loss": 1.0218,
"step": 119000
},
{
"epoch": 2.9,
"learning_rate": 1.710147738660977e-06,
"loss": 0.9819,
"step": 119500
},
{
"epoch": 2.91,
"learning_rate": 1.5080981476093493e-06,
"loss": 1.0366,
"step": 120000
},
{
"epoch": 2.92,
"learning_rate": 1.3060485565577217e-06,
"loss": 0.9988,
"step": 120500
},
{
"epoch": 2.93,
"learning_rate": 1.1039989655060938e-06,
"loss": 1.0048,
"step": 121000
},
{
"epoch": 2.95,
"learning_rate": 9.019493744544661e-07,
"loss": 1.0015,
"step": 121500
},
{
"epoch": 2.96,
"learning_rate": 6.998997834028384e-07,
"loss": 0.9998,
"step": 122000
},
{
"epoch": 2.97,
"learning_rate": 4.978501923512107e-07,
"loss": 1.0045,
"step": 122500
},
{
"epoch": 2.98,
"learning_rate": 2.95800601299583e-07,
"loss": 1.0024,
"step": 123000
},
{
"epoch": 2.99,
"learning_rate": 9.375101024795526e-08,
"loss": 0.995,
"step": 123500
},
{
"epoch": 3.0,
"step": 123732,
"total_flos": 1.8552133900202803e+17,
"train_loss": 1.3215980781360575,
"train_runtime": 31676.1684,
"train_samples_per_second": 39.061,
"train_steps_per_second": 3.906
}
],
"max_steps": 123732,
"num_train_epochs": 3,
"total_flos": 1.8552133900202803e+17,
"trial_name": null,
"trial_params": null
}