BPE-HF-CC100-FR / trainer_state.json
qanastek's picture
Upload 41 files
1764a0d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 27.0,
"global_step": 101034,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-09,
"loss": 10.5048,
"step": 1
},
{
"epoch": 0.13,
"learning_rate": 2.5e-06,
"loss": 9.5536,
"step": 500
},
{
"epoch": 0.27,
"learning_rate": 5e-06,
"loss": 7.9704,
"step": 1000
},
{
"epoch": 0.4,
"learning_rate": 7.5e-06,
"loss": 6.9597,
"step": 1500
},
{
"epoch": 0.53,
"learning_rate": 1e-05,
"loss": 6.6577,
"step": 2000
},
{
"epoch": 0.67,
"learning_rate": 1.25e-05,
"loss": 6.4897,
"step": 2500
},
{
"epoch": 0.8,
"learning_rate": 1.5e-05,
"loss": 6.3759,
"step": 3000
},
{
"epoch": 0.94,
"learning_rate": 1.75e-05,
"loss": 6.2799,
"step": 3500
},
{
"epoch": 1.07,
"learning_rate": 2e-05,
"loss": 6.2039,
"step": 4000
},
{
"epoch": 1.2,
"learning_rate": 2.25e-05,
"loss": 6.1376,
"step": 4500
},
{
"epoch": 1.34,
"learning_rate": 2.5e-05,
"loss": 6.0826,
"step": 5000
},
{
"epoch": 1.47,
"learning_rate": 2.7500000000000004e-05,
"loss": 6.0309,
"step": 5500
},
{
"epoch": 1.6,
"learning_rate": 3e-05,
"loss": 5.9909,
"step": 6000
},
{
"epoch": 1.74,
"learning_rate": 3.2500000000000004e-05,
"loss": 5.9553,
"step": 6500
},
{
"epoch": 1.87,
"learning_rate": 3.5e-05,
"loss": 5.919,
"step": 7000
},
{
"epoch": 2.0,
"learning_rate": 3.7500000000000003e-05,
"loss": 5.8937,
"step": 7500
},
{
"epoch": 2.14,
"learning_rate": 4e-05,
"loss": 5.8604,
"step": 8000
},
{
"epoch": 2.27,
"learning_rate": 4.2495e-05,
"loss": 5.8383,
"step": 8500
},
{
"epoch": 2.41,
"learning_rate": 4.4995000000000005e-05,
"loss": 5.8167,
"step": 9000
},
{
"epoch": 2.54,
"learning_rate": 4.7495e-05,
"loss": 5.7994,
"step": 9500
},
{
"epoch": 2.67,
"learning_rate": 4.9995000000000005e-05,
"loss": 5.7835,
"step": 10000
},
{
"epoch": 2.81,
"learning_rate": 4.998325262308313e-05,
"loss": 5.7681,
"step": 10500
},
{
"epoch": 2.94,
"learning_rate": 4.996643798762443e-05,
"loss": 5.753,
"step": 11000
},
{
"epoch": 3.07,
"learning_rate": 4.994962335216573e-05,
"loss": 5.7391,
"step": 11500
},
{
"epoch": 3.21,
"learning_rate": 4.993280871670703e-05,
"loss": 5.7271,
"step": 12000
},
{
"epoch": 3.34,
"learning_rate": 4.991602771051924e-05,
"loss": 5.7125,
"step": 12500
},
{
"epoch": 3.47,
"learning_rate": 4.9899213075060537e-05,
"loss": 5.7069,
"step": 13000
},
{
"epoch": 3.61,
"learning_rate": 4.988239843960183e-05,
"loss": 5.6974,
"step": 13500
},
{
"epoch": 3.74,
"learning_rate": 4.986558380414313e-05,
"loss": 5.687,
"step": 14000
},
{
"epoch": 3.87,
"learning_rate": 4.984880279795534e-05,
"loss": 5.6798,
"step": 14500
},
{
"epoch": 4.01,
"learning_rate": 4.983198816249664e-05,
"loss": 5.6725,
"step": 15000
},
{
"epoch": 4.14,
"learning_rate": 4.9815207156308854e-05,
"loss": 5.6667,
"step": 15500
},
{
"epoch": 4.28,
"learning_rate": 4.979839252085015e-05,
"loss": 5.6581,
"step": 16000
},
{
"epoch": 4.41,
"learning_rate": 4.9781577885391446e-05,
"loss": 5.6536,
"step": 16500
},
{
"epoch": 4.54,
"learning_rate": 4.9764763249932745e-05,
"loss": 5.6439,
"step": 17000
},
{
"epoch": 4.68,
"learning_rate": 4.974794861447404e-05,
"loss": 5.6382,
"step": 17500
},
{
"epoch": 4.81,
"learning_rate": 4.9731167608286254e-05,
"loss": 5.6337,
"step": 18000
},
{
"epoch": 4.94,
"learning_rate": 4.9714352972827547e-05,
"loss": 5.6271,
"step": 18500
},
{
"epoch": 5.08,
"learning_rate": 4.9697538337368846e-05,
"loss": 5.6235,
"step": 19000
},
{
"epoch": 5.21,
"learning_rate": 4.9680723701910145e-05,
"loss": 5.6189,
"step": 19500
},
{
"epoch": 5.34,
"learning_rate": 4.966394269572236e-05,
"loss": 5.6165,
"step": 20000
},
{
"epoch": 5.48,
"learning_rate": 4.9647128060263654e-05,
"loss": 5.6132,
"step": 20500
},
{
"epoch": 5.61,
"learning_rate": 4.9630313424804953e-05,
"loss": 5.6078,
"step": 21000
},
{
"epoch": 5.75,
"learning_rate": 4.9613498789346246e-05,
"loss": 5.6022,
"step": 21500
},
{
"epoch": 5.88,
"learning_rate": 4.9596684153887545e-05,
"loss": 5.5965,
"step": 22000
},
{
"epoch": 6.01,
"learning_rate": 4.9579903147699755e-05,
"loss": 5.5932,
"step": 22500
},
{
"epoch": 6.15,
"learning_rate": 4.9563088512241054e-05,
"loss": 5.5905,
"step": 23000
},
{
"epoch": 6.28,
"learning_rate": 4.9546273876782353e-05,
"loss": 5.5882,
"step": 23500
},
{
"epoch": 6.41,
"learning_rate": 4.952949287059457e-05,
"loss": 5.5835,
"step": 24000
},
{
"epoch": 6.55,
"learning_rate": 4.951267823513586e-05,
"loss": 5.5795,
"step": 24500
},
{
"epoch": 6.68,
"learning_rate": 4.949586359967716e-05,
"loss": 5.5758,
"step": 25000
},
{
"epoch": 6.81,
"learning_rate": 4.947904896421846e-05,
"loss": 5.5759,
"step": 25500
},
{
"epoch": 6.95,
"learning_rate": 4.9462234328759754e-05,
"loss": 5.5725,
"step": 26000
},
{
"epoch": 7.08,
"learning_rate": 4.944541969330105e-05,
"loss": 5.5683,
"step": 26500
},
{
"epoch": 7.22,
"learning_rate": 4.9428605057842345e-05,
"loss": 5.5647,
"step": 27000
},
{
"epoch": 7.35,
"learning_rate": 4.9411790422383644e-05,
"loss": 5.5634,
"step": 27500
},
{
"epoch": 7.48,
"learning_rate": 4.939500941619586e-05,
"loss": 5.56,
"step": 28000
},
{
"epoch": 7.62,
"learning_rate": 4.937822841000808e-05,
"loss": 5.5573,
"step": 28500
},
{
"epoch": 7.75,
"learning_rate": 4.936141377454937e-05,
"loss": 5.5541,
"step": 29000
},
{
"epoch": 7.88,
"learning_rate": 4.934459913909067e-05,
"loss": 5.551,
"step": 29500
},
{
"epoch": 8.02,
"learning_rate": 4.932778450363196e-05,
"loss": 5.5479,
"step": 30000
},
{
"epoch": 8.15,
"learning_rate": 4.931096986817326e-05,
"loss": 5.5441,
"step": 30500
},
{
"epoch": 8.28,
"learning_rate": 4.9294155232714554e-05,
"loss": 5.5424,
"step": 31000
},
{
"epoch": 8.42,
"learning_rate": 4.927737422652677e-05,
"loss": 5.5381,
"step": 31500
},
{
"epoch": 8.55,
"learning_rate": 4.926055959106807e-05,
"loss": 5.5362,
"step": 32000
},
{
"epoch": 8.69,
"learning_rate": 4.924374495560937e-05,
"loss": 5.5352,
"step": 32500
},
{
"epoch": 8.82,
"learning_rate": 4.922693032015066e-05,
"loss": 5.5319,
"step": 33000
},
{
"epoch": 8.95,
"learning_rate": 4.921011568469196e-05,
"loss": 5.5338,
"step": 33500
},
{
"epoch": 9.09,
"learning_rate": 4.919330104923325e-05,
"loss": 5.5267,
"step": 34000
},
{
"epoch": 9.22,
"learning_rate": 4.917652004304547e-05,
"loss": 5.5255,
"step": 34500
},
{
"epoch": 9.35,
"learning_rate": 4.915970540758676e-05,
"loss": 5.5245,
"step": 35000
},
{
"epoch": 9.49,
"learning_rate": 4.914289077212806e-05,
"loss": 5.5187,
"step": 35500
},
{
"epoch": 9.62,
"learning_rate": 4.912607613666936e-05,
"loss": 5.5183,
"step": 36000
},
{
"epoch": 9.75,
"learning_rate": 4.910929513048158e-05,
"loss": 5.5174,
"step": 36500
},
{
"epoch": 9.89,
"learning_rate": 4.909248049502287e-05,
"loss": 5.5174,
"step": 37000
},
{
"epoch": 10.02,
"learning_rate": 4.907566585956417e-05,
"loss": 5.5139,
"step": 37500
},
{
"epoch": 10.15,
"learning_rate": 4.905885122410546e-05,
"loss": 5.5066,
"step": 38000
},
{
"epoch": 10.29,
"learning_rate": 4.904203658864676e-05,
"loss": 5.5077,
"step": 38500
},
{
"epoch": 10.42,
"learning_rate": 4.902522195318805e-05,
"loss": 5.5018,
"step": 39000
},
{
"epoch": 10.56,
"learning_rate": 4.900844094700027e-05,
"loss": 5.4997,
"step": 39500
},
{
"epoch": 10.69,
"learning_rate": 4.899162631154157e-05,
"loss": 5.3193,
"step": 40000
},
{
"epoch": 10.82,
"learning_rate": 4.897481167608287e-05,
"loss": 5.0833,
"step": 40500
},
{
"epoch": 10.96,
"learning_rate": 4.895799704062416e-05,
"loss": 4.8992,
"step": 41000
},
{
"epoch": 11.09,
"learning_rate": 4.894118240516546e-05,
"loss": 4.7196,
"step": 41500
},
{
"epoch": 11.22,
"learning_rate": 4.892436776970675e-05,
"loss": 4.552,
"step": 42000
},
{
"epoch": 11.36,
"learning_rate": 4.890755313424805e-05,
"loss": 4.3827,
"step": 42500
},
{
"epoch": 11.49,
"learning_rate": 4.889073849878935e-05,
"loss": 4.2208,
"step": 43000
},
{
"epoch": 11.62,
"learning_rate": 4.887392386333064e-05,
"loss": 4.0618,
"step": 43500
},
{
"epoch": 11.76,
"learning_rate": 4.885710922787194e-05,
"loss": 3.8934,
"step": 44000
},
{
"epoch": 11.89,
"learning_rate": 4.884029459241324e-05,
"loss": 3.6578,
"step": 44500
},
{
"epoch": 12.03,
"learning_rate": 4.882347995695454e-05,
"loss": 3.1504,
"step": 45000
},
{
"epoch": 12.16,
"learning_rate": 4.880669895076675e-05,
"loss": 2.52,
"step": 45500
},
{
"epoch": 12.29,
"learning_rate": 4.878991794457896e-05,
"loss": 2.2575,
"step": 46000
},
{
"epoch": 12.43,
"learning_rate": 4.877310330912026e-05,
"loss": 2.1152,
"step": 46500
},
{
"epoch": 12.56,
"learning_rate": 4.875632230293247e-05,
"loss": 2.0167,
"step": 47000
},
{
"epoch": 12.69,
"learning_rate": 4.873950766747377e-05,
"loss": 1.9382,
"step": 47500
},
{
"epoch": 12.83,
"learning_rate": 4.872269303201507e-05,
"loss": 1.8744,
"step": 48000
},
{
"epoch": 12.96,
"learning_rate": 4.870587839655637e-05,
"loss": 1.8223,
"step": 48500
},
{
"epoch": 13.09,
"learning_rate": 4.868906376109767e-05,
"loss": 1.773,
"step": 49000
},
{
"epoch": 13.23,
"learning_rate": 4.867224912563896e-05,
"loss": 1.7323,
"step": 49500
},
{
"epoch": 13.36,
"learning_rate": 4.8655468119451176e-05,
"loss": 1.6973,
"step": 50000
},
{
"epoch": 13.5,
"learning_rate": 4.863865348399247e-05,
"loss": 1.6658,
"step": 50500
},
{
"epoch": 13.63,
"learning_rate": 4.862183884853377e-05,
"loss": 1.6321,
"step": 51000
},
{
"epoch": 13.76,
"learning_rate": 4.860502421307506e-05,
"loss": 1.6099,
"step": 51500
},
{
"epoch": 13.9,
"learning_rate": 4.858820957761636e-05,
"loss": 1.5828,
"step": 52000
},
{
"epoch": 14.03,
"learning_rate": 4.857139494215766e-05,
"loss": 1.5617,
"step": 52500
},
{
"epoch": 14.16,
"learning_rate": 4.855458030669896e-05,
"loss": 1.539,
"step": 53000
},
{
"epoch": 14.3,
"learning_rate": 4.853779930051117e-05,
"loss": 1.5203,
"step": 53500
},
{
"epoch": 14.43,
"learning_rate": 4.852098466505247e-05,
"loss": 1.5006,
"step": 54000
},
{
"epoch": 14.56,
"learning_rate": 4.850417002959376e-05,
"loss": 1.4819,
"step": 54500
},
{
"epoch": 14.7,
"learning_rate": 4.848735539413506e-05,
"loss": 1.4656,
"step": 55000
},
{
"epoch": 14.83,
"learning_rate": 4.847054075867635e-05,
"loss": 1.4481,
"step": 55500
},
{
"epoch": 14.97,
"learning_rate": 4.845372612321765e-05,
"loss": 1.4359,
"step": 56000
},
{
"epoch": 15.1,
"learning_rate": 4.843691148775894e-05,
"loss": 1.4202,
"step": 56500
},
{
"epoch": 15.23,
"learning_rate": 4.842009685230024e-05,
"loss": 1.4077,
"step": 57000
},
{
"epoch": 15.37,
"learning_rate": 4.840328221684154e-05,
"loss": 1.3939,
"step": 57500
},
{
"epoch": 15.5,
"learning_rate": 4.838650121065376e-05,
"loss": 1.3803,
"step": 58000
},
{
"epoch": 15.63,
"learning_rate": 4.836968657519505e-05,
"loss": 1.37,
"step": 58500
},
{
"epoch": 15.77,
"learning_rate": 4.835287193973635e-05,
"loss": 1.3594,
"step": 59000
},
{
"epoch": 15.9,
"learning_rate": 4.833605730427764e-05,
"loss": 1.3471,
"step": 59500
},
{
"epoch": 16.03,
"learning_rate": 4.831924266881894e-05,
"loss": 1.3378,
"step": 60000
},
{
"epoch": 16.17,
"learning_rate": 4.8302428033360234e-05,
"loss": 1.3292,
"step": 60500
},
{
"epoch": 16.3,
"learning_rate": 4.828564702717246e-05,
"loss": 1.317,
"step": 61000
},
{
"epoch": 16.44,
"learning_rate": 4.826883239171375e-05,
"loss": 1.3105,
"step": 61500
},
{
"epoch": 16.57,
"learning_rate": 4.825201775625505e-05,
"loss": 1.3016,
"step": 62000
},
{
"epoch": 16.7,
"learning_rate": 4.823520312079635e-05,
"loss": 1.2924,
"step": 62500
},
{
"epoch": 16.84,
"learning_rate": 4.821838848533764e-05,
"loss": 1.2859,
"step": 63000
},
{
"epoch": 16.97,
"learning_rate": 4.820160747914985e-05,
"loss": 1.2765,
"step": 63500
},
{
"epoch": 17.1,
"learning_rate": 4.818482647296207e-05,
"loss": 1.2693,
"step": 64000
},
{
"epoch": 17.24,
"learning_rate": 4.8168011837503366e-05,
"loss": 1.26,
"step": 64500
},
{
"epoch": 17.37,
"learning_rate": 4.8151197202044665e-05,
"loss": 1.2559,
"step": 65000
},
{
"epoch": 17.5,
"learning_rate": 4.813438256658596e-05,
"loss": 1.2461,
"step": 65500
},
{
"epoch": 17.64,
"learning_rate": 4.811756793112726e-05,
"loss": 1.2371,
"step": 66000
},
{
"epoch": 17.77,
"learning_rate": 4.8100753295668556e-05,
"loss": 1.2331,
"step": 66500
},
{
"epoch": 17.9,
"learning_rate": 4.808393866020985e-05,
"loss": 1.2268,
"step": 67000
},
{
"epoch": 18.04,
"learning_rate": 4.8067157654022065e-05,
"loss": 1.2191,
"step": 67500
},
{
"epoch": 18.17,
"learning_rate": 4.805034301856336e-05,
"loss": 1.2137,
"step": 68000
},
{
"epoch": 18.31,
"learning_rate": 4.803352838310466e-05,
"loss": 1.2077,
"step": 68500
},
{
"epoch": 18.44,
"learning_rate": 4.801671374764595e-05,
"loss": 1.2037,
"step": 69000
},
{
"epoch": 18.57,
"learning_rate": 4.799993274145817e-05,
"loss": 1.1989,
"step": 69500
},
{
"epoch": 18.71,
"learning_rate": 4.7983118105999466e-05,
"loss": 1.191,
"step": 70000
},
{
"epoch": 18.84,
"learning_rate": 4.7966303470540765e-05,
"loss": 1.1843,
"step": 70500
},
{
"epoch": 18.97,
"learning_rate": 4.794948883508206e-05,
"loss": 1.1815,
"step": 71000
},
{
"epoch": 19.11,
"learning_rate": 4.7932707828894274e-05,
"loss": 1.1736,
"step": 71500
},
{
"epoch": 19.24,
"learning_rate": 4.7915893193435566e-05,
"loss": 1.1709,
"step": 72000
},
{
"epoch": 19.37,
"learning_rate": 4.7899078557976866e-05,
"loss": 1.1651,
"step": 72500
},
{
"epoch": 19.51,
"learning_rate": 4.788226392251816e-05,
"loss": 1.1602,
"step": 73000
},
{
"epoch": 19.64,
"learning_rate": 4.786548291633038e-05,
"loss": 1.1588,
"step": 73500
},
{
"epoch": 19.78,
"learning_rate": 4.7848668280871674e-05,
"loss": 1.1525,
"step": 74000
},
{
"epoch": 19.91,
"learning_rate": 4.783185364541297e-05,
"loss": 1.1486,
"step": 74500
},
{
"epoch": 20.04,
"learning_rate": 4.7815039009954266e-05,
"loss": 1.1437,
"step": 75000
},
{
"epoch": 20.18,
"learning_rate": 4.779825800376648e-05,
"loss": 1.1383,
"step": 75500
},
{
"epoch": 20.31,
"learning_rate": 4.7781443368307775e-05,
"loss": 1.1339,
"step": 76000
},
{
"epoch": 20.44,
"learning_rate": 4.776466236211999e-05,
"loss": 1.1319,
"step": 76500
},
{
"epoch": 20.58,
"learning_rate": 4.774784772666129e-05,
"loss": 1.1266,
"step": 77000
},
{
"epoch": 20.71,
"learning_rate": 4.773103309120259e-05,
"loss": 1.1244,
"step": 77500
},
{
"epoch": 20.84,
"learning_rate": 4.771421845574388e-05,
"loss": 1.1191,
"step": 78000
},
{
"epoch": 20.98,
"learning_rate": 4.769740382028518e-05,
"loss": 1.1156,
"step": 78500
},
{
"epoch": 21.11,
"learning_rate": 4.768062281409739e-05,
"loss": 1.1107,
"step": 79000
},
{
"epoch": 21.25,
"learning_rate": 4.766380817863869e-05,
"loss": 1.1068,
"step": 79500
},
{
"epoch": 21.38,
"learning_rate": 4.764699354317998e-05,
"loss": 1.1029,
"step": 80000
},
{
"epoch": 21.51,
"learning_rate": 4.763017890772128e-05,
"loss": 1.0994,
"step": 80500
},
{
"epoch": 21.65,
"learning_rate": 4.761336427226258e-05,
"loss": 1.0979,
"step": 81000
},
{
"epoch": 21.78,
"learning_rate": 4.759654963680388e-05,
"loss": 1.0913,
"step": 81500
},
{
"epoch": 21.91,
"learning_rate": 4.757976863061609e-05,
"loss": 1.09,
"step": 82000
},
{
"epoch": 22.05,
"learning_rate": 4.756295399515739e-05,
"loss": 1.0872,
"step": 82500
},
{
"epoch": 22.18,
"learning_rate": 4.754613935969868e-05,
"loss": 1.0837,
"step": 83000
},
{
"epoch": 22.31,
"learning_rate": 4.752932472423998e-05,
"loss": 1.0798,
"step": 83500
},
{
"epoch": 22.45,
"learning_rate": 4.7512510088781274e-05,
"loss": 1.0777,
"step": 84000
},
{
"epoch": 22.58,
"learning_rate": 4.749569545332257e-05,
"loss": 1.0765,
"step": 84500
},
{
"epoch": 22.72,
"learning_rate": 4.7478880817863866e-05,
"loss": 1.0731,
"step": 85000
},
{
"epoch": 22.85,
"learning_rate": 4.746209981167609e-05,
"loss": 1.0673,
"step": 85500
},
{
"epoch": 22.98,
"learning_rate": 4.744528517621738e-05,
"loss": 1.0655,
"step": 86000
},
{
"epoch": 23.12,
"learning_rate": 4.742847054075868e-05,
"loss": 1.0623,
"step": 86500
},
{
"epoch": 23.25,
"learning_rate": 4.741165590529997e-05,
"loss": 1.0599,
"step": 87000
},
{
"epoch": 23.38,
"learning_rate": 4.739487489911219e-05,
"loss": 1.0587,
"step": 87500
},
{
"epoch": 23.52,
"learning_rate": 4.737806026365348e-05,
"loss": 1.0544,
"step": 88000
},
{
"epoch": 23.65,
"learning_rate": 4.736124562819478e-05,
"loss": 1.0527,
"step": 88500
},
{
"epoch": 23.78,
"learning_rate": 4.7344430992736074e-05,
"loss": 1.0489,
"step": 89000
},
{
"epoch": 23.92,
"learning_rate": 4.73276499865483e-05,
"loss": 1.0463,
"step": 89500
},
{
"epoch": 24.05,
"learning_rate": 4.731083535108959e-05,
"loss": 1.0447,
"step": 90000
},
{
"epoch": 24.18,
"learning_rate": 4.729402071563089e-05,
"loss": 1.0401,
"step": 90500
},
{
"epoch": 24.32,
"learning_rate": 4.727720608017218e-05,
"loss": 1.04,
"step": 91000
},
{
"epoch": 24.45,
"learning_rate": 4.726039144471348e-05,
"loss": 1.0371,
"step": 91500
},
{
"epoch": 24.59,
"learning_rate": 4.724364406779661e-05,
"loss": 1.0343,
"step": 92000
},
{
"epoch": 24.72,
"learning_rate": 4.722682943233791e-05,
"loss": 1.0336,
"step": 92500
},
{
"epoch": 24.85,
"learning_rate": 4.721001479687921e-05,
"loss": 1.0307,
"step": 93000
},
{
"epoch": 24.99,
"learning_rate": 4.7193200161420506e-05,
"loss": 1.0294,
"step": 93500
},
{
"epoch": 25.12,
"learning_rate": 4.71763855259618e-05,
"loss": 1.0243,
"step": 94000
},
{
"epoch": 25.25,
"learning_rate": 4.71595708905031e-05,
"loss": 1.0225,
"step": 94500
},
{
"epoch": 25.39,
"learning_rate": 4.714275625504439e-05,
"loss": 1.0205,
"step": 95000
},
{
"epoch": 25.52,
"learning_rate": 4.712594161958569e-05,
"loss": 1.0201,
"step": 95500
},
{
"epoch": 25.65,
"learning_rate": 4.710912698412699e-05,
"loss": 1.0171,
"step": 96000
},
{
"epoch": 25.79,
"learning_rate": 4.7092379607210116e-05,
"loss": 1.0139,
"step": 96500
},
{
"epoch": 25.92,
"learning_rate": 4.7075564971751415e-05,
"loss": 1.0131,
"step": 97000
},
{
"epoch": 26.06,
"learning_rate": 4.7058750336292714e-05,
"loss": 1.0114,
"step": 97500
},
{
"epoch": 26.19,
"learning_rate": 4.704193570083401e-05,
"loss": 1.0082,
"step": 98000
},
{
"epoch": 26.32,
"learning_rate": 4.7025121065375306e-05,
"loss": 1.0081,
"step": 98500
},
{
"epoch": 26.46,
"learning_rate": 4.7008306429916605e-05,
"loss": 1.0042,
"step": 99000
},
{
"epoch": 26.59,
"learning_rate": 4.69914917944579e-05,
"loss": 1.0034,
"step": 99500
},
{
"epoch": 26.72,
"learning_rate": 4.697471078827011e-05,
"loss": 1.0016,
"step": 100000
},
{
"epoch": 26.86,
"learning_rate": 4.695789615281141e-05,
"loss": 0.9991,
"step": 100500
},
{
"epoch": 26.99,
"learning_rate": 4.6941115146623624e-05,
"loss": 0.9963,
"step": 101000
}
],
"max_steps": 1496800,
"num_train_epochs": 400,
"total_flos": 2.722996359502024e+19,
"trial_name": null,
"trial_params": null
}