AryaSuprana's picture
First Commit
66f2d9e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 199.88412514484358,
"global_step": 690000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 4.996378910776362e-05,
"loss": 8.4547,
"step": 500
},
{
"epoch": 0.29,
"learning_rate": 4.9927578215527235e-05,
"loss": 7.8499,
"step": 1000
},
{
"epoch": 0.43,
"learning_rate": 4.989136732329085e-05,
"loss": 7.554,
"step": 1500
},
{
"epoch": 0.58,
"learning_rate": 4.985515643105446e-05,
"loss": 7.4955,
"step": 2000
},
{
"epoch": 0.72,
"learning_rate": 4.9818945538818076e-05,
"loss": 7.2532,
"step": 2500
},
{
"epoch": 0.87,
"learning_rate": 4.97827346465817e-05,
"loss": 7.2584,
"step": 3000
},
{
"epoch": 1.01,
"learning_rate": 4.974652375434531e-05,
"loss": 7.175,
"step": 3500
},
{
"epoch": 1.16,
"learning_rate": 4.9710312862108924e-05,
"loss": 6.9372,
"step": 4000
},
{
"epoch": 1.3,
"learning_rate": 4.967410196987254e-05,
"loss": 6.9367,
"step": 4500
},
{
"epoch": 1.45,
"learning_rate": 4.9637891077636157e-05,
"loss": 6.7982,
"step": 5000
},
{
"epoch": 1.59,
"learning_rate": 4.9601680185399766e-05,
"loss": 6.9367,
"step": 5500
},
{
"epoch": 1.74,
"learning_rate": 4.956546929316339e-05,
"loss": 6.7646,
"step": 6000
},
{
"epoch": 1.88,
"learning_rate": 4.9529258400927005e-05,
"loss": 6.8283,
"step": 6500
},
{
"epoch": 2.03,
"learning_rate": 4.9493047508690614e-05,
"loss": 6.6735,
"step": 7000
},
{
"epoch": 2.17,
"learning_rate": 4.945683661645423e-05,
"loss": 6.5376,
"step": 7500
},
{
"epoch": 2.32,
"learning_rate": 4.9420625724217846e-05,
"loss": 6.4529,
"step": 8000
},
{
"epoch": 2.46,
"learning_rate": 4.938441483198146e-05,
"loss": 6.5368,
"step": 8500
},
{
"epoch": 2.61,
"learning_rate": 4.934820393974508e-05,
"loss": 6.4591,
"step": 9000
},
{
"epoch": 2.75,
"learning_rate": 4.9311993047508695e-05,
"loss": 6.313,
"step": 9500
},
{
"epoch": 2.9,
"learning_rate": 4.927578215527231e-05,
"loss": 6.4253,
"step": 10000
},
{
"epoch": 3.04,
"learning_rate": 4.923957126303592e-05,
"loss": 6.2994,
"step": 10500
},
{
"epoch": 3.19,
"learning_rate": 4.9203360370799536e-05,
"loss": 6.1315,
"step": 11000
},
{
"epoch": 3.33,
"learning_rate": 4.916714947856315e-05,
"loss": 6.2794,
"step": 11500
},
{
"epoch": 3.48,
"learning_rate": 4.913093858632677e-05,
"loss": 6.2569,
"step": 12000
},
{
"epoch": 3.62,
"learning_rate": 4.9094727694090384e-05,
"loss": 6.1676,
"step": 12500
},
{
"epoch": 3.77,
"learning_rate": 4.9058516801854e-05,
"loss": 6.0854,
"step": 13000
},
{
"epoch": 3.91,
"learning_rate": 4.902230590961762e-05,
"loss": 6.1253,
"step": 13500
},
{
"epoch": 4.06,
"learning_rate": 4.8986095017381226e-05,
"loss": 6.1414,
"step": 14000
},
{
"epoch": 4.2,
"learning_rate": 4.894988412514485e-05,
"loss": 6.0679,
"step": 14500
},
{
"epoch": 4.35,
"learning_rate": 4.8913673232908465e-05,
"loss": 6.0058,
"step": 15000
},
{
"epoch": 4.49,
"learning_rate": 4.8877462340672074e-05,
"loss": 6.0498,
"step": 15500
},
{
"epoch": 4.63,
"learning_rate": 4.884125144843569e-05,
"loss": 5.9609,
"step": 16000
},
{
"epoch": 4.78,
"learning_rate": 4.8805040556199306e-05,
"loss": 5.9032,
"step": 16500
},
{
"epoch": 4.92,
"learning_rate": 4.876882966396292e-05,
"loss": 5.7145,
"step": 17000
},
{
"epoch": 5.07,
"learning_rate": 4.873261877172654e-05,
"loss": 5.8655,
"step": 17500
},
{
"epoch": 5.21,
"learning_rate": 4.8696407879490155e-05,
"loss": 5.7881,
"step": 18000
},
{
"epoch": 5.36,
"learning_rate": 4.866019698725377e-05,
"loss": 5.7224,
"step": 18500
},
{
"epoch": 5.5,
"learning_rate": 4.862398609501738e-05,
"loss": 5.8037,
"step": 19000
},
{
"epoch": 5.65,
"learning_rate": 4.8587775202780996e-05,
"loss": 5.7771,
"step": 19500
},
{
"epoch": 5.79,
"learning_rate": 4.855156431054461e-05,
"loss": 5.6896,
"step": 20000
},
{
"epoch": 5.94,
"learning_rate": 4.851535341830823e-05,
"loss": 5.6381,
"step": 20500
},
{
"epoch": 6.08,
"learning_rate": 4.8479142526071845e-05,
"loss": 5.7065,
"step": 21000
},
{
"epoch": 6.23,
"learning_rate": 4.844293163383546e-05,
"loss": 5.6466,
"step": 21500
},
{
"epoch": 6.37,
"learning_rate": 4.840672074159908e-05,
"loss": 5.5202,
"step": 22000
},
{
"epoch": 6.52,
"learning_rate": 4.8370509849362686e-05,
"loss": 5.553,
"step": 22500
},
{
"epoch": 6.66,
"learning_rate": 4.833429895712631e-05,
"loss": 5.523,
"step": 23000
},
{
"epoch": 6.81,
"learning_rate": 4.8298088064889925e-05,
"loss": 5.4201,
"step": 23500
},
{
"epoch": 6.95,
"learning_rate": 4.8261877172653534e-05,
"loss": 5.5552,
"step": 24000
},
{
"epoch": 7.1,
"learning_rate": 4.822566628041715e-05,
"loss": 5.361,
"step": 24500
},
{
"epoch": 7.24,
"learning_rate": 4.8189455388180767e-05,
"loss": 5.4092,
"step": 25000
},
{
"epoch": 7.39,
"learning_rate": 4.815324449594438e-05,
"loss": 5.3235,
"step": 25500
},
{
"epoch": 7.53,
"learning_rate": 4.811703360370799e-05,
"loss": 5.4796,
"step": 26000
},
{
"epoch": 7.68,
"learning_rate": 4.8080822711471615e-05,
"loss": 5.3789,
"step": 26500
},
{
"epoch": 7.82,
"learning_rate": 4.804461181923523e-05,
"loss": 5.2428,
"step": 27000
},
{
"epoch": 7.97,
"learning_rate": 4.800840092699884e-05,
"loss": 5.4215,
"step": 27500
},
{
"epoch": 8.11,
"learning_rate": 4.797219003476246e-05,
"loss": 5.2112,
"step": 28000
},
{
"epoch": 8.26,
"learning_rate": 4.793597914252607e-05,
"loss": 5.4211,
"step": 28500
},
{
"epoch": 8.4,
"learning_rate": 4.789976825028969e-05,
"loss": 5.2253,
"step": 29000
},
{
"epoch": 8.55,
"learning_rate": 4.7863557358053305e-05,
"loss": 5.1749,
"step": 29500
},
{
"epoch": 8.69,
"learning_rate": 4.782734646581692e-05,
"loss": 5.121,
"step": 30000
},
{
"epoch": 8.84,
"learning_rate": 4.779113557358054e-05,
"loss": 5.1387,
"step": 30500
},
{
"epoch": 8.98,
"learning_rate": 4.7754924681344146e-05,
"loss": 5.198,
"step": 31000
},
{
"epoch": 9.13,
"learning_rate": 4.771871378910777e-05,
"loss": 4.9978,
"step": 31500
},
{
"epoch": 9.27,
"learning_rate": 4.7682502896871385e-05,
"loss": 4.9791,
"step": 32000
},
{
"epoch": 9.41,
"learning_rate": 4.7646292004634994e-05,
"loss": 4.9845,
"step": 32500
},
{
"epoch": 9.56,
"learning_rate": 4.761008111239861e-05,
"loss": 4.8999,
"step": 33000
},
{
"epoch": 9.7,
"learning_rate": 4.7573870220162227e-05,
"loss": 4.9872,
"step": 33500
},
{
"epoch": 9.85,
"learning_rate": 4.753765932792584e-05,
"loss": 5.0741,
"step": 34000
},
{
"epoch": 9.99,
"learning_rate": 4.750144843568945e-05,
"loss": 5.0205,
"step": 34500
},
{
"epoch": 10.14,
"learning_rate": 4.7465237543453075e-05,
"loss": 4.7512,
"step": 35000
},
{
"epoch": 10.28,
"learning_rate": 4.742902665121669e-05,
"loss": 4.9417,
"step": 35500
},
{
"epoch": 10.43,
"learning_rate": 4.73928157589803e-05,
"loss": 4.9746,
"step": 36000
},
{
"epoch": 10.57,
"learning_rate": 4.735660486674392e-05,
"loss": 4.747,
"step": 36500
},
{
"epoch": 10.72,
"learning_rate": 4.732039397450753e-05,
"loss": 4.6858,
"step": 37000
},
{
"epoch": 10.86,
"learning_rate": 4.728418308227115e-05,
"loss": 4.8208,
"step": 37500
},
{
"epoch": 11.01,
"learning_rate": 4.7247972190034765e-05,
"loss": 4.7271,
"step": 38000
},
{
"epoch": 11.15,
"learning_rate": 4.721176129779838e-05,
"loss": 4.6115,
"step": 38500
},
{
"epoch": 11.3,
"learning_rate": 4.7175550405562e-05,
"loss": 4.7304,
"step": 39000
},
{
"epoch": 11.44,
"learning_rate": 4.7139339513325606e-05,
"loss": 4.4773,
"step": 39500
},
{
"epoch": 11.59,
"learning_rate": 4.710312862108923e-05,
"loss": 4.6107,
"step": 40000
},
{
"epoch": 11.73,
"learning_rate": 4.706691772885284e-05,
"loss": 4.5442,
"step": 40500
},
{
"epoch": 11.88,
"learning_rate": 4.7030706836616454e-05,
"loss": 4.744,
"step": 41000
},
{
"epoch": 12.02,
"learning_rate": 4.699449594438007e-05,
"loss": 4.6054,
"step": 41500
},
{
"epoch": 12.17,
"learning_rate": 4.695828505214369e-05,
"loss": 4.6134,
"step": 42000
},
{
"epoch": 12.31,
"learning_rate": 4.69220741599073e-05,
"loss": 4.578,
"step": 42500
},
{
"epoch": 12.46,
"learning_rate": 4.688586326767091e-05,
"loss": 4.4292,
"step": 43000
},
{
"epoch": 12.6,
"learning_rate": 4.6849652375434535e-05,
"loss": 4.364,
"step": 43500
},
{
"epoch": 12.75,
"learning_rate": 4.681344148319815e-05,
"loss": 4.3788,
"step": 44000
},
{
"epoch": 12.89,
"learning_rate": 4.677723059096176e-05,
"loss": 4.4367,
"step": 44500
},
{
"epoch": 13.04,
"learning_rate": 4.674101969872538e-05,
"loss": 4.3541,
"step": 45000
},
{
"epoch": 13.18,
"learning_rate": 4.670480880648899e-05,
"loss": 4.2888,
"step": 45500
},
{
"epoch": 13.33,
"learning_rate": 4.666859791425261e-05,
"loss": 4.387,
"step": 46000
},
{
"epoch": 13.47,
"learning_rate": 4.6632387022016225e-05,
"loss": 4.3504,
"step": 46500
},
{
"epoch": 13.62,
"learning_rate": 4.659617612977984e-05,
"loss": 4.2987,
"step": 47000
},
{
"epoch": 13.76,
"learning_rate": 4.655996523754346e-05,
"loss": 4.3652,
"step": 47500
},
{
"epoch": 13.9,
"learning_rate": 4.6523754345307066e-05,
"loss": 4.4629,
"step": 48000
},
{
"epoch": 14.05,
"learning_rate": 4.648754345307069e-05,
"loss": 4.2083,
"step": 48500
},
{
"epoch": 14.19,
"learning_rate": 4.64513325608343e-05,
"loss": 4.2904,
"step": 49000
},
{
"epoch": 14.34,
"learning_rate": 4.6415121668597915e-05,
"loss": 4.2137,
"step": 49500
},
{
"epoch": 14.48,
"learning_rate": 4.637891077636153e-05,
"loss": 4.2921,
"step": 50000
},
{
"epoch": 14.63,
"learning_rate": 4.634269988412515e-05,
"loss": 4.2011,
"step": 50500
},
{
"epoch": 14.77,
"learning_rate": 4.630648899188876e-05,
"loss": 4.1971,
"step": 51000
},
{
"epoch": 14.92,
"learning_rate": 4.627027809965237e-05,
"loss": 4.2372,
"step": 51500
},
{
"epoch": 15.06,
"learning_rate": 4.6234067207415995e-05,
"loss": 4.1604,
"step": 52000
},
{
"epoch": 15.21,
"learning_rate": 4.619785631517961e-05,
"loss": 4.0747,
"step": 52500
},
{
"epoch": 15.35,
"learning_rate": 4.616164542294322e-05,
"loss": 4.1311,
"step": 53000
},
{
"epoch": 15.5,
"learning_rate": 4.612543453070684e-05,
"loss": 4.1403,
"step": 53500
},
{
"epoch": 15.64,
"learning_rate": 4.608922363847045e-05,
"loss": 4.2004,
"step": 54000
},
{
"epoch": 15.79,
"learning_rate": 4.605301274623407e-05,
"loss": 4.1078,
"step": 54500
},
{
"epoch": 15.93,
"learning_rate": 4.6016801853997685e-05,
"loss": 4.1072,
"step": 55000
},
{
"epoch": 16.08,
"learning_rate": 4.59805909617613e-05,
"loss": 4.1097,
"step": 55500
},
{
"epoch": 16.22,
"learning_rate": 4.594438006952492e-05,
"loss": 3.901,
"step": 56000
},
{
"epoch": 16.37,
"learning_rate": 4.5908169177288526e-05,
"loss": 4.0599,
"step": 56500
},
{
"epoch": 16.51,
"learning_rate": 4.587195828505215e-05,
"loss": 4.0458,
"step": 57000
},
{
"epoch": 16.66,
"learning_rate": 4.583574739281576e-05,
"loss": 4.0344,
"step": 57500
},
{
"epoch": 16.8,
"learning_rate": 4.5799536500579375e-05,
"loss": 3.9669,
"step": 58000
},
{
"epoch": 16.95,
"learning_rate": 4.576332560834299e-05,
"loss": 4.0911,
"step": 58500
},
{
"epoch": 17.09,
"learning_rate": 4.572711471610661e-05,
"loss": 4.0642,
"step": 59000
},
{
"epoch": 17.24,
"learning_rate": 4.569090382387022e-05,
"loss": 3.9691,
"step": 59500
},
{
"epoch": 17.38,
"learning_rate": 4.565469293163384e-05,
"loss": 3.8211,
"step": 60000
},
{
"epoch": 17.53,
"learning_rate": 4.5618482039397455e-05,
"loss": 3.8802,
"step": 60500
},
{
"epoch": 17.67,
"learning_rate": 4.558227114716107e-05,
"loss": 3.9179,
"step": 61000
},
{
"epoch": 17.82,
"learning_rate": 4.554606025492468e-05,
"loss": 3.9943,
"step": 61500
},
{
"epoch": 17.96,
"learning_rate": 4.55098493626883e-05,
"loss": 3.9248,
"step": 62000
},
{
"epoch": 18.11,
"learning_rate": 4.547363847045191e-05,
"loss": 3.8573,
"step": 62500
},
{
"epoch": 18.25,
"learning_rate": 4.543742757821553e-05,
"loss": 3.8063,
"step": 63000
},
{
"epoch": 18.4,
"learning_rate": 4.5401216685979145e-05,
"loss": 3.7963,
"step": 63500
},
{
"epoch": 18.54,
"learning_rate": 4.536500579374276e-05,
"loss": 3.923,
"step": 64000
},
{
"epoch": 18.68,
"learning_rate": 4.532879490150638e-05,
"loss": 3.817,
"step": 64500
},
{
"epoch": 18.83,
"learning_rate": 4.5292584009269986e-05,
"loss": 3.8602,
"step": 65000
},
{
"epoch": 18.97,
"learning_rate": 4.525637311703361e-05,
"loss": 3.6989,
"step": 65500
},
{
"epoch": 19.12,
"learning_rate": 4.522016222479722e-05,
"loss": 3.7462,
"step": 66000
},
{
"epoch": 19.26,
"learning_rate": 4.5183951332560835e-05,
"loss": 3.7723,
"step": 66500
},
{
"epoch": 19.41,
"learning_rate": 4.514774044032445e-05,
"loss": 3.8287,
"step": 67000
},
{
"epoch": 19.55,
"learning_rate": 4.511152954808807e-05,
"loss": 3.7813,
"step": 67500
},
{
"epoch": 19.7,
"learning_rate": 4.507531865585168e-05,
"loss": 3.893,
"step": 68000
},
{
"epoch": 19.84,
"learning_rate": 4.50391077636153e-05,
"loss": 3.8048,
"step": 68500
},
{
"epoch": 19.99,
"learning_rate": 4.5002896871378915e-05,
"loss": 3.7366,
"step": 69000
},
{
"epoch": 20.13,
"learning_rate": 4.4966685979142524e-05,
"loss": 3.6066,
"step": 69500
},
{
"epoch": 20.28,
"learning_rate": 4.493047508690614e-05,
"loss": 3.6848,
"step": 70000
},
{
"epoch": 20.42,
"learning_rate": 4.4894264194669763e-05,
"loss": 3.7469,
"step": 70500
},
{
"epoch": 20.57,
"learning_rate": 4.485805330243337e-05,
"loss": 3.7087,
"step": 71000
},
{
"epoch": 20.71,
"learning_rate": 4.482184241019699e-05,
"loss": 3.7463,
"step": 71500
},
{
"epoch": 20.86,
"learning_rate": 4.4785631517960605e-05,
"loss": 3.6861,
"step": 72000
},
{
"epoch": 21.0,
"learning_rate": 4.474942062572422e-05,
"loss": 3.7919,
"step": 72500
},
{
"epoch": 21.15,
"learning_rate": 4.471320973348784e-05,
"loss": 3.6995,
"step": 73000
},
{
"epoch": 21.29,
"learning_rate": 4.4676998841251446e-05,
"loss": 3.5774,
"step": 73500
},
{
"epoch": 21.44,
"learning_rate": 4.464078794901507e-05,
"loss": 3.6384,
"step": 74000
},
{
"epoch": 21.58,
"learning_rate": 4.460457705677868e-05,
"loss": 3.7398,
"step": 74500
},
{
"epoch": 21.73,
"learning_rate": 4.4568366164542295e-05,
"loss": 3.6163,
"step": 75000
},
{
"epoch": 21.87,
"learning_rate": 4.453215527230592e-05,
"loss": 3.6542,
"step": 75500
},
{
"epoch": 22.02,
"learning_rate": 4.449594438006953e-05,
"loss": 3.5554,
"step": 76000
},
{
"epoch": 22.16,
"learning_rate": 4.445973348783314e-05,
"loss": 3.4238,
"step": 76500
},
{
"epoch": 22.31,
"learning_rate": 4.442352259559676e-05,
"loss": 3.5048,
"step": 77000
},
{
"epoch": 22.45,
"learning_rate": 4.4387311703360375e-05,
"loss": 3.3922,
"step": 77500
},
{
"epoch": 22.6,
"learning_rate": 4.4351100811123985e-05,
"loss": 3.5757,
"step": 78000
},
{
"epoch": 22.74,
"learning_rate": 4.43148899188876e-05,
"loss": 3.6146,
"step": 78500
},
{
"epoch": 22.89,
"learning_rate": 4.4278679026651224e-05,
"loss": 3.4844,
"step": 79000
},
{
"epoch": 23.03,
"learning_rate": 4.424246813441483e-05,
"loss": 3.5872,
"step": 79500
},
{
"epoch": 23.17,
"learning_rate": 4.420625724217845e-05,
"loss": 3.336,
"step": 80000
},
{
"epoch": 23.32,
"learning_rate": 4.4170046349942065e-05,
"loss": 3.4509,
"step": 80500
},
{
"epoch": 23.46,
"learning_rate": 4.413383545770568e-05,
"loss": 3.5564,
"step": 81000
},
{
"epoch": 23.61,
"learning_rate": 4.40976245654693e-05,
"loss": 3.4449,
"step": 81500
},
{
"epoch": 23.75,
"learning_rate": 4.4061413673232907e-05,
"loss": 3.4913,
"step": 82000
},
{
"epoch": 23.9,
"learning_rate": 4.402520278099653e-05,
"loss": 3.6125,
"step": 82500
},
{
"epoch": 24.04,
"learning_rate": 4.398899188876014e-05,
"loss": 3.3634,
"step": 83000
},
{
"epoch": 24.19,
"learning_rate": 4.3952780996523755e-05,
"loss": 3.5246,
"step": 83500
},
{
"epoch": 24.33,
"learning_rate": 4.391657010428738e-05,
"loss": 3.3986,
"step": 84000
},
{
"epoch": 24.48,
"learning_rate": 4.388035921205099e-05,
"loss": 3.4707,
"step": 84500
},
{
"epoch": 24.62,
"learning_rate": 4.38441483198146e-05,
"loss": 3.4469,
"step": 85000
},
{
"epoch": 24.77,
"learning_rate": 4.380793742757822e-05,
"loss": 3.3893,
"step": 85500
},
{
"epoch": 24.91,
"learning_rate": 4.3771726535341835e-05,
"loss": 3.3876,
"step": 86000
},
{
"epoch": 25.06,
"learning_rate": 4.3735515643105445e-05,
"loss": 3.3913,
"step": 86500
},
{
"epoch": 25.2,
"learning_rate": 4.369930475086906e-05,
"loss": 3.472,
"step": 87000
},
{
"epoch": 25.35,
"learning_rate": 4.3663093858632684e-05,
"loss": 3.2559,
"step": 87500
},
{
"epoch": 25.49,
"learning_rate": 4.362688296639629e-05,
"loss": 3.461,
"step": 88000
},
{
"epoch": 25.64,
"learning_rate": 4.359067207415991e-05,
"loss": 3.3586,
"step": 88500
},
{
"epoch": 25.78,
"learning_rate": 4.3554461181923525e-05,
"loss": 3.2862,
"step": 89000
},
{
"epoch": 25.93,
"learning_rate": 4.351825028968714e-05,
"loss": 3.2737,
"step": 89500
},
{
"epoch": 26.07,
"learning_rate": 4.348203939745076e-05,
"loss": 3.2978,
"step": 90000
},
{
"epoch": 26.22,
"learning_rate": 4.344582850521437e-05,
"loss": 3.2151,
"step": 90500
},
{
"epoch": 26.36,
"learning_rate": 4.340961761297799e-05,
"loss": 3.339,
"step": 91000
},
{
"epoch": 26.51,
"learning_rate": 4.33734067207416e-05,
"loss": 3.2553,
"step": 91500
},
{
"epoch": 26.65,
"learning_rate": 4.3337195828505215e-05,
"loss": 3.1708,
"step": 92000
},
{
"epoch": 26.8,
"learning_rate": 4.330098493626883e-05,
"loss": 3.2419,
"step": 92500
},
{
"epoch": 26.94,
"learning_rate": 4.326477404403245e-05,
"loss": 3.3119,
"step": 93000
},
{
"epoch": 27.09,
"learning_rate": 4.322856315179606e-05,
"loss": 3.1761,
"step": 93500
},
{
"epoch": 27.23,
"learning_rate": 4.319235225955968e-05,
"loss": 3.0938,
"step": 94000
},
{
"epoch": 27.38,
"learning_rate": 4.3156141367323295e-05,
"loss": 3.2,
"step": 94500
},
{
"epoch": 27.52,
"learning_rate": 4.3119930475086905e-05,
"loss": 3.2952,
"step": 95000
},
{
"epoch": 27.67,
"learning_rate": 4.308371958285052e-05,
"loss": 3.2587,
"step": 95500
},
{
"epoch": 27.81,
"learning_rate": 4.3047508690614144e-05,
"loss": 3.1839,
"step": 96000
},
{
"epoch": 27.95,
"learning_rate": 4.301129779837775e-05,
"loss": 3.2991,
"step": 96500
},
{
"epoch": 28.1,
"learning_rate": 4.297508690614137e-05,
"loss": 3.2873,
"step": 97000
},
{
"epoch": 28.24,
"learning_rate": 4.2938876013904985e-05,
"loss": 3.2506,
"step": 97500
},
{
"epoch": 28.39,
"learning_rate": 4.29026651216686e-05,
"loss": 3.0849,
"step": 98000
},
{
"epoch": 28.53,
"learning_rate": 4.286645422943222e-05,
"loss": 3.1321,
"step": 98500
},
{
"epoch": 28.68,
"learning_rate": 4.283024333719583e-05,
"loss": 3.0935,
"step": 99000
},
{
"epoch": 28.82,
"learning_rate": 4.279403244495945e-05,
"loss": 3.1311,
"step": 99500
},
{
"epoch": 28.97,
"learning_rate": 4.275782155272306e-05,
"loss": 3.0079,
"step": 100000
},
{
"epoch": 29.11,
"learning_rate": 4.2721610660486675e-05,
"loss": 3.0359,
"step": 100500
},
{
"epoch": 29.26,
"learning_rate": 4.268539976825029e-05,
"loss": 3.1054,
"step": 101000
},
{
"epoch": 29.4,
"learning_rate": 4.264918887601391e-05,
"loss": 2.9851,
"step": 101500
},
{
"epoch": 29.55,
"learning_rate": 4.261297798377752e-05,
"loss": 3.0301,
"step": 102000
},
{
"epoch": 29.69,
"learning_rate": 4.257676709154114e-05,
"loss": 3.0815,
"step": 102500
},
{
"epoch": 29.84,
"learning_rate": 4.2540556199304755e-05,
"loss": 3.0711,
"step": 103000
},
{
"epoch": 29.98,
"learning_rate": 4.2504345307068365e-05,
"loss": 3.2656,
"step": 103500
},
{
"epoch": 30.13,
"learning_rate": 4.246813441483198e-05,
"loss": 2.9655,
"step": 104000
},
{
"epoch": 30.27,
"learning_rate": 4.2431923522595604e-05,
"loss": 2.9877,
"step": 104500
},
{
"epoch": 30.42,
"learning_rate": 4.239571263035921e-05,
"loss": 3.1679,
"step": 105000
},
{
"epoch": 30.56,
"learning_rate": 4.235950173812283e-05,
"loss": 2.9898,
"step": 105500
},
{
"epoch": 30.71,
"learning_rate": 4.2323290845886445e-05,
"loss": 3.0315,
"step": 106000
},
{
"epoch": 30.85,
"learning_rate": 4.228707995365006e-05,
"loss": 2.9994,
"step": 106500
},
{
"epoch": 31.0,
"learning_rate": 4.225086906141367e-05,
"loss": 3.0117,
"step": 107000
},
{
"epoch": 31.14,
"learning_rate": 4.2214658169177294e-05,
"loss": 2.9706,
"step": 107500
},
{
"epoch": 31.29,
"learning_rate": 4.217844727694091e-05,
"loss": 2.8996,
"step": 108000
},
{
"epoch": 31.43,
"learning_rate": 4.214223638470452e-05,
"loss": 2.8631,
"step": 108500
},
{
"epoch": 31.58,
"learning_rate": 4.2106025492468135e-05,
"loss": 3.0409,
"step": 109000
},
{
"epoch": 31.72,
"learning_rate": 4.206981460023175e-05,
"loss": 2.9536,
"step": 109500
},
{
"epoch": 31.87,
"learning_rate": 4.203360370799537e-05,
"loss": 2.9889,
"step": 110000
},
{
"epoch": 32.01,
"learning_rate": 4.199739281575898e-05,
"loss": 2.9418,
"step": 110500
},
{
"epoch": 32.16,
"learning_rate": 4.19611819235226e-05,
"loss": 2.9762,
"step": 111000
},
{
"epoch": 32.3,
"learning_rate": 4.1924971031286216e-05,
"loss": 2.8923,
"step": 111500
},
{
"epoch": 32.44,
"learning_rate": 4.1888760139049825e-05,
"loss": 2.8679,
"step": 112000
},
{
"epoch": 32.59,
"learning_rate": 4.185254924681344e-05,
"loss": 3.0042,
"step": 112500
},
{
"epoch": 32.73,
"learning_rate": 4.1816338354577064e-05,
"loss": 2.7931,
"step": 113000
},
{
"epoch": 32.88,
"learning_rate": 4.178012746234067e-05,
"loss": 3.0075,
"step": 113500
},
{
"epoch": 33.02,
"learning_rate": 4.174391657010429e-05,
"loss": 2.9121,
"step": 114000
},
{
"epoch": 33.17,
"learning_rate": 4.1707705677867905e-05,
"loss": 2.8189,
"step": 114500
},
{
"epoch": 33.31,
"learning_rate": 4.167149478563152e-05,
"loss": 2.7948,
"step": 115000
},
{
"epoch": 33.46,
"learning_rate": 4.163528389339513e-05,
"loss": 2.8265,
"step": 115500
},
{
"epoch": 33.6,
"learning_rate": 4.1599073001158754e-05,
"loss": 2.934,
"step": 116000
},
{
"epoch": 33.75,
"learning_rate": 4.156286210892237e-05,
"loss": 2.8438,
"step": 116500
},
{
"epoch": 33.89,
"learning_rate": 4.152665121668598e-05,
"loss": 2.9726,
"step": 117000
},
{
"epoch": 34.04,
"learning_rate": 4.1490440324449595e-05,
"loss": 2.7706,
"step": 117500
},
{
"epoch": 34.18,
"learning_rate": 4.145422943221321e-05,
"loss": 2.7422,
"step": 118000
},
{
"epoch": 34.33,
"learning_rate": 4.141801853997683e-05,
"loss": 2.7464,
"step": 118500
},
{
"epoch": 34.47,
"learning_rate": 4.1381807647740443e-05,
"loss": 2.842,
"step": 119000
},
{
"epoch": 34.62,
"learning_rate": 4.134559675550406e-05,
"loss": 2.7679,
"step": 119500
},
{
"epoch": 34.76,
"learning_rate": 4.1309385863267676e-05,
"loss": 2.8504,
"step": 120000
},
{
"epoch": 34.91,
"learning_rate": 4.1273174971031285e-05,
"loss": 2.7849,
"step": 120500
},
{
"epoch": 35.05,
"learning_rate": 4.12369640787949e-05,
"loss": 2.7947,
"step": 121000
},
{
"epoch": 35.2,
"learning_rate": 4.120075318655852e-05,
"loss": 2.6207,
"step": 121500
},
{
"epoch": 35.34,
"learning_rate": 4.116454229432213e-05,
"loss": 2.7135,
"step": 122000
},
{
"epoch": 35.49,
"learning_rate": 4.112833140208575e-05,
"loss": 2.796,
"step": 122500
},
{
"epoch": 35.63,
"learning_rate": 4.1092120509849365e-05,
"loss": 2.7093,
"step": 123000
},
{
"epoch": 35.78,
"learning_rate": 4.105590961761298e-05,
"loss": 2.722,
"step": 123500
},
{
"epoch": 35.92,
"learning_rate": 4.101969872537659e-05,
"loss": 2.6685,
"step": 124000
},
{
"epoch": 36.07,
"learning_rate": 4.0983487833140214e-05,
"loss": 2.8914,
"step": 124500
},
{
"epoch": 36.21,
"learning_rate": 4.094727694090383e-05,
"loss": 2.7502,
"step": 125000
},
{
"epoch": 36.36,
"learning_rate": 4.091106604866744e-05,
"loss": 2.6001,
"step": 125500
},
{
"epoch": 36.5,
"learning_rate": 4.0874855156431055e-05,
"loss": 2.6482,
"step": 126000
},
{
"epoch": 36.65,
"learning_rate": 4.083864426419467e-05,
"loss": 2.5456,
"step": 126500
},
{
"epoch": 36.79,
"learning_rate": 4.080243337195829e-05,
"loss": 2.7484,
"step": 127000
},
{
"epoch": 36.94,
"learning_rate": 4.0766222479721903e-05,
"loss": 2.706,
"step": 127500
},
{
"epoch": 37.08,
"learning_rate": 4.073001158748552e-05,
"loss": 2.6221,
"step": 128000
},
{
"epoch": 37.22,
"learning_rate": 4.0693800695249136e-05,
"loss": 2.5343,
"step": 128500
},
{
"epoch": 37.37,
"learning_rate": 4.0657589803012745e-05,
"loss": 2.5271,
"step": 129000
},
{
"epoch": 37.51,
"learning_rate": 4.062137891077636e-05,
"loss": 2.6493,
"step": 129500
},
{
"epoch": 37.66,
"learning_rate": 4.058516801853998e-05,
"loss": 2.6699,
"step": 130000
},
{
"epoch": 37.8,
"learning_rate": 4.054895712630359e-05,
"loss": 2.6375,
"step": 130500
},
{
"epoch": 37.95,
"learning_rate": 4.051274623406721e-05,
"loss": 2.6659,
"step": 131000
},
{
"epoch": 38.09,
"learning_rate": 4.0476535341830825e-05,
"loss": 2.6404,
"step": 131500
},
{
"epoch": 38.24,
"learning_rate": 4.044032444959444e-05,
"loss": 2.4527,
"step": 132000
},
{
"epoch": 38.38,
"learning_rate": 4.040411355735805e-05,
"loss": 2.4812,
"step": 132500
},
{
"epoch": 38.53,
"learning_rate": 4.0367902665121674e-05,
"loss": 2.675,
"step": 133000
},
{
"epoch": 38.67,
"learning_rate": 4.033169177288529e-05,
"loss": 2.6116,
"step": 133500
},
{
"epoch": 38.82,
"learning_rate": 4.02954808806489e-05,
"loss": 2.5089,
"step": 134000
},
{
"epoch": 38.96,
"learning_rate": 4.0259269988412515e-05,
"loss": 2.6363,
"step": 134500
},
{
"epoch": 39.11,
"learning_rate": 4.022305909617613e-05,
"loss": 2.4868,
"step": 135000
},
{
"epoch": 39.25,
"learning_rate": 4.018684820393975e-05,
"loss": 2.4665,
"step": 135500
},
{
"epoch": 39.4,
"learning_rate": 4.015063731170336e-05,
"loss": 2.5217,
"step": 136000
},
{
"epoch": 39.54,
"learning_rate": 4.011442641946698e-05,
"loss": 2.5457,
"step": 136500
},
{
"epoch": 39.69,
"learning_rate": 4.0078215527230596e-05,
"loss": 2.5308,
"step": 137000
},
{
"epoch": 39.83,
"learning_rate": 4.0042004634994205e-05,
"loss": 2.5148,
"step": 137500
},
{
"epoch": 39.98,
"learning_rate": 4.000579374275782e-05,
"loss": 2.552,
"step": 138000
},
{
"epoch": 40.12,
"learning_rate": 3.996958285052144e-05,
"loss": 2.5229,
"step": 138500
},
{
"epoch": 40.27,
"learning_rate": 3.993337195828505e-05,
"loss": 2.3691,
"step": 139000
},
{
"epoch": 40.41,
"learning_rate": 3.989716106604867e-05,
"loss": 2.4165,
"step": 139500
},
{
"epoch": 40.56,
"learning_rate": 3.9860950173812286e-05,
"loss": 2.5134,
"step": 140000
},
{
"epoch": 40.7,
"learning_rate": 3.98247392815759e-05,
"loss": 2.4526,
"step": 140500
},
{
"epoch": 40.85,
"learning_rate": 3.978852838933951e-05,
"loss": 2.4467,
"step": 141000
},
{
"epoch": 40.99,
"learning_rate": 3.9752317497103134e-05,
"loss": 2.4586,
"step": 141500
},
{
"epoch": 41.14,
"learning_rate": 3.971610660486675e-05,
"loss": 2.2715,
"step": 142000
},
{
"epoch": 41.28,
"learning_rate": 3.967989571263036e-05,
"loss": 2.4122,
"step": 142500
},
{
"epoch": 41.43,
"learning_rate": 3.9643684820393975e-05,
"loss": 2.5038,
"step": 143000
},
{
"epoch": 41.57,
"learning_rate": 3.960747392815759e-05,
"loss": 2.4213,
"step": 143500
},
{
"epoch": 41.71,
"learning_rate": 3.957126303592121e-05,
"loss": 2.3776,
"step": 144000
},
{
"epoch": 41.86,
"learning_rate": 3.953505214368482e-05,
"loss": 2.4879,
"step": 144500
},
{
"epoch": 42.0,
"learning_rate": 3.949884125144844e-05,
"loss": 2.3793,
"step": 145000
},
{
"epoch": 42.15,
"learning_rate": 3.9462630359212056e-05,
"loss": 2.3577,
"step": 145500
},
{
"epoch": 42.29,
"learning_rate": 3.9426419466975665e-05,
"loss": 2.2899,
"step": 146000
},
{
"epoch": 42.44,
"learning_rate": 3.939020857473928e-05,
"loss": 2.2713,
"step": 146500
},
{
"epoch": 42.58,
"learning_rate": 3.93539976825029e-05,
"loss": 2.4175,
"step": 147000
},
{
"epoch": 42.73,
"learning_rate": 3.9317786790266513e-05,
"loss": 2.3225,
"step": 147500
},
{
"epoch": 42.87,
"learning_rate": 3.928157589803013e-05,
"loss": 2.4637,
"step": 148000
},
{
"epoch": 43.02,
"learning_rate": 3.9245365005793746e-05,
"loss": 2.4782,
"step": 148500
},
{
"epoch": 43.16,
"learning_rate": 3.920915411355736e-05,
"loss": 2.2441,
"step": 149000
},
{
"epoch": 43.31,
"learning_rate": 3.917294322132097e-05,
"loss": 2.3654,
"step": 149500
},
{
"epoch": 43.45,
"learning_rate": 3.9136732329084594e-05,
"loss": 2.3439,
"step": 150000
},
{
"epoch": 43.6,
"learning_rate": 3.91005214368482e-05,
"loss": 2.1964,
"step": 150500
},
{
"epoch": 43.74,
"learning_rate": 3.906431054461182e-05,
"loss": 2.3778,
"step": 151000
},
{
"epoch": 43.89,
"learning_rate": 3.9028099652375435e-05,
"loss": 2.3038,
"step": 151500
},
{
"epoch": 44.03,
"learning_rate": 3.899188876013905e-05,
"loss": 2.3877,
"step": 152000
},
{
"epoch": 44.18,
"learning_rate": 3.895567786790267e-05,
"loss": 2.2923,
"step": 152500
},
{
"epoch": 44.32,
"learning_rate": 3.891946697566628e-05,
"loss": 2.1665,
"step": 153000
},
{
"epoch": 44.47,
"learning_rate": 3.88832560834299e-05,
"loss": 2.2541,
"step": 153500
},
{
"epoch": 44.61,
"learning_rate": 3.8847045191193516e-05,
"loss": 2.3349,
"step": 154000
},
{
"epoch": 44.76,
"learning_rate": 3.8810834298957125e-05,
"loss": 2.1701,
"step": 154500
},
{
"epoch": 44.9,
"learning_rate": 3.877462340672075e-05,
"loss": 2.3005,
"step": 155000
},
{
"epoch": 45.05,
"learning_rate": 3.873841251448436e-05,
"loss": 2.2592,
"step": 155500
},
{
"epoch": 45.19,
"learning_rate": 3.8702201622247974e-05,
"loss": 2.1503,
"step": 156000
},
{
"epoch": 45.34,
"learning_rate": 3.866599073001159e-05,
"loss": 2.2068,
"step": 156500
},
{
"epoch": 45.48,
"learning_rate": 3.8629779837775206e-05,
"loss": 2.2581,
"step": 157000
},
{
"epoch": 45.63,
"learning_rate": 3.859356894553882e-05,
"loss": 2.207,
"step": 157500
},
{
"epoch": 45.77,
"learning_rate": 3.855735805330243e-05,
"loss": 2.2473,
"step": 158000
},
{
"epoch": 45.92,
"learning_rate": 3.8521147161066054e-05,
"loss": 2.3088,
"step": 158500
},
{
"epoch": 46.06,
"learning_rate": 3.848493626882966e-05,
"loss": 2.199,
"step": 159000
},
{
"epoch": 46.21,
"learning_rate": 3.844872537659328e-05,
"loss": 2.1469,
"step": 159500
},
{
"epoch": 46.35,
"learning_rate": 3.8412514484356895e-05,
"loss": 2.2277,
"step": 160000
},
{
"epoch": 46.49,
"learning_rate": 3.837630359212051e-05,
"loss": 2.0807,
"step": 160500
},
{
"epoch": 46.64,
"learning_rate": 3.834009269988413e-05,
"loss": 2.1421,
"step": 161000
},
{
"epoch": 46.78,
"learning_rate": 3.830388180764774e-05,
"loss": 2.0924,
"step": 161500
},
{
"epoch": 46.93,
"learning_rate": 3.826767091541136e-05,
"loss": 2.2633,
"step": 162000
},
{
"epoch": 47.07,
"learning_rate": 3.8231460023174976e-05,
"loss": 2.1603,
"step": 162500
},
{
"epoch": 47.22,
"learning_rate": 3.8195249130938585e-05,
"loss": 2.1038,
"step": 163000
},
{
"epoch": 47.36,
"learning_rate": 3.815903823870221e-05,
"loss": 2.081,
"step": 163500
},
{
"epoch": 47.51,
"learning_rate": 3.812282734646582e-05,
"loss": 2.0259,
"step": 164000
},
{
"epoch": 47.65,
"learning_rate": 3.8086616454229434e-05,
"loss": 2.0775,
"step": 164500
},
{
"epoch": 47.8,
"learning_rate": 3.805040556199305e-05,
"loss": 2.119,
"step": 165000
},
{
"epoch": 47.94,
"learning_rate": 3.8014194669756666e-05,
"loss": 2.0324,
"step": 165500
},
{
"epoch": 48.09,
"learning_rate": 3.797798377752028e-05,
"loss": 2.0463,
"step": 166000
},
{
"epoch": 48.23,
"learning_rate": 3.794177288528389e-05,
"loss": 1.9982,
"step": 166500
},
{
"epoch": 48.38,
"learning_rate": 3.7905561993047514e-05,
"loss": 2.1069,
"step": 167000
},
{
"epoch": 48.52,
"learning_rate": 3.786935110081112e-05,
"loss": 2.0855,
"step": 167500
},
{
"epoch": 48.67,
"learning_rate": 3.783314020857474e-05,
"loss": 2.0103,
"step": 168000
},
{
"epoch": 48.81,
"learning_rate": 3.7796929316338356e-05,
"loss": 2.1315,
"step": 168500
},
{
"epoch": 48.96,
"learning_rate": 3.776071842410197e-05,
"loss": 2.2163,
"step": 169000
},
{
"epoch": 49.1,
"learning_rate": 3.772450753186559e-05,
"loss": 2.0169,
"step": 169500
},
{
"epoch": 49.25,
"learning_rate": 3.76882966396292e-05,
"loss": 1.9853,
"step": 170000
},
{
"epoch": 49.39,
"learning_rate": 3.765208574739282e-05,
"loss": 2.0007,
"step": 170500
},
{
"epoch": 49.54,
"learning_rate": 3.7615874855156436e-05,
"loss": 1.9034,
"step": 171000
},
{
"epoch": 49.68,
"learning_rate": 3.7579663962920045e-05,
"loss": 2.0278,
"step": 171500
},
{
"epoch": 49.83,
"learning_rate": 3.754345307068367e-05,
"loss": 2.0554,
"step": 172000
},
{
"epoch": 49.97,
"learning_rate": 3.750724217844728e-05,
"loss": 2.0944,
"step": 172500
},
{
"epoch": 50.12,
"learning_rate": 3.7471031286210894e-05,
"loss": 1.9666,
"step": 173000
},
{
"epoch": 50.26,
"learning_rate": 3.743482039397451e-05,
"loss": 1.9237,
"step": 173500
},
{
"epoch": 50.41,
"learning_rate": 3.7398609501738126e-05,
"loss": 1.9957,
"step": 174000
},
{
"epoch": 50.55,
"learning_rate": 3.736239860950174e-05,
"loss": 1.9975,
"step": 174500
},
{
"epoch": 50.7,
"learning_rate": 3.732618771726535e-05,
"loss": 1.923,
"step": 175000
},
{
"epoch": 50.84,
"learning_rate": 3.7289976825028974e-05,
"loss": 2.0131,
"step": 175500
},
{
"epoch": 50.98,
"learning_rate": 3.7253765932792583e-05,
"loss": 2.0053,
"step": 176000
},
{
"epoch": 51.13,
"learning_rate": 3.72175550405562e-05,
"loss": 1.9155,
"step": 176500
},
{
"epoch": 51.27,
"learning_rate": 3.7181344148319816e-05,
"loss": 1.8302,
"step": 177000
},
{
"epoch": 51.42,
"learning_rate": 3.714513325608343e-05,
"loss": 1.8982,
"step": 177500
},
{
"epoch": 51.56,
"learning_rate": 3.710892236384705e-05,
"loss": 1.9291,
"step": 178000
},
{
"epoch": 51.71,
"learning_rate": 3.707271147161066e-05,
"loss": 1.9618,
"step": 178500
},
{
"epoch": 51.85,
"learning_rate": 3.703650057937428e-05,
"loss": 1.906,
"step": 179000
},
{
"epoch": 52.0,
"learning_rate": 3.700028968713789e-05,
"loss": 1.9577,
"step": 179500
},
{
"epoch": 52.14,
"learning_rate": 3.6964078794901505e-05,
"loss": 1.9181,
"step": 180000
},
{
"epoch": 52.29,
"learning_rate": 3.692786790266513e-05,
"loss": 1.8794,
"step": 180500
},
{
"epoch": 52.43,
"learning_rate": 3.689165701042874e-05,
"loss": 1.8926,
"step": 181000
},
{
"epoch": 52.58,
"learning_rate": 3.6855446118192354e-05,
"loss": 1.9498,
"step": 181500
},
{
"epoch": 52.72,
"learning_rate": 3.681923522595597e-05,
"loss": 1.8301,
"step": 182000
},
{
"epoch": 52.87,
"learning_rate": 3.6783024333719586e-05,
"loss": 1.8718,
"step": 182500
},
{
"epoch": 53.01,
"learning_rate": 3.67468134414832e-05,
"loss": 1.9002,
"step": 183000
},
{
"epoch": 53.16,
"learning_rate": 3.671060254924681e-05,
"loss": 1.7962,
"step": 183500
},
{
"epoch": 53.3,
"learning_rate": 3.6674391657010434e-05,
"loss": 1.7957,
"step": 184000
},
{
"epoch": 53.45,
"learning_rate": 3.6638180764774044e-05,
"loss": 1.8693,
"step": 184500
},
{
"epoch": 53.59,
"learning_rate": 3.660196987253766e-05,
"loss": 1.8591,
"step": 185000
},
{
"epoch": 53.74,
"learning_rate": 3.6565758980301276e-05,
"loss": 1.8654,
"step": 185500
},
{
"epoch": 53.88,
"learning_rate": 3.652954808806489e-05,
"loss": 1.8288,
"step": 186000
},
{
"epoch": 54.03,
"learning_rate": 3.649333719582851e-05,
"loss": 1.9124,
"step": 186500
},
{
"epoch": 54.17,
"learning_rate": 3.6457126303592124e-05,
"loss": 1.7429,
"step": 187000
},
{
"epoch": 54.32,
"learning_rate": 3.642091541135574e-05,
"loss": 1.8316,
"step": 187500
},
{
"epoch": 54.46,
"learning_rate": 3.638470451911935e-05,
"loss": 1.7558,
"step": 188000
},
{
"epoch": 54.61,
"learning_rate": 3.6348493626882966e-05,
"loss": 1.8358,
"step": 188500
},
{
"epoch": 54.75,
"learning_rate": 3.631228273464659e-05,
"loss": 1.7743,
"step": 189000
},
{
"epoch": 54.9,
"learning_rate": 3.62760718424102e-05,
"loss": 1.8545,
"step": 189500
},
{
"epoch": 55.04,
"learning_rate": 3.6239860950173814e-05,
"loss": 1.7785,
"step": 190000
},
{
"epoch": 55.19,
"learning_rate": 3.620365005793743e-05,
"loss": 1.764,
"step": 190500
},
{
"epoch": 55.33,
"learning_rate": 3.6167439165701046e-05,
"loss": 1.8189,
"step": 191000
},
{
"epoch": 55.48,
"learning_rate": 3.613122827346466e-05,
"loss": 1.6939,
"step": 191500
},
{
"epoch": 55.62,
"learning_rate": 3.609501738122827e-05,
"loss": 1.7771,
"step": 192000
},
{
"epoch": 55.76,
"learning_rate": 3.6058806488991894e-05,
"loss": 1.7576,
"step": 192500
},
{
"epoch": 55.91,
"learning_rate": 3.6022595596755504e-05,
"loss": 1.8118,
"step": 193000
},
{
"epoch": 56.05,
"learning_rate": 3.598638470451912e-05,
"loss": 1.7491,
"step": 193500
},
{
"epoch": 56.2,
"learning_rate": 3.5950173812282736e-05,
"loss": 1.7124,
"step": 194000
},
{
"epoch": 56.34,
"learning_rate": 3.591396292004635e-05,
"loss": 1.7616,
"step": 194500
},
{
"epoch": 56.49,
"learning_rate": 3.587775202780997e-05,
"loss": 1.7315,
"step": 195000
},
{
"epoch": 56.63,
"learning_rate": 3.5841541135573584e-05,
"loss": 1.6236,
"step": 195500
},
{
"epoch": 56.78,
"learning_rate": 3.58053302433372e-05,
"loss": 1.7692,
"step": 196000
},
{
"epoch": 56.92,
"learning_rate": 3.576911935110081e-05,
"loss": 1.6878,
"step": 196500
},
{
"epoch": 57.07,
"learning_rate": 3.5732908458864426e-05,
"loss": 1.6287,
"step": 197000
},
{
"epoch": 57.21,
"learning_rate": 3.569669756662805e-05,
"loss": 1.6295,
"step": 197500
},
{
"epoch": 57.36,
"learning_rate": 3.566048667439166e-05,
"loss": 1.712,
"step": 198000
},
{
"epoch": 57.5,
"learning_rate": 3.5624275782155274e-05,
"loss": 1.6799,
"step": 198500
},
{
"epoch": 57.65,
"learning_rate": 3.558806488991889e-05,
"loss": 1.7199,
"step": 199000
},
{
"epoch": 57.79,
"learning_rate": 3.5551853997682506e-05,
"loss": 1.6429,
"step": 199500
},
{
"epoch": 57.94,
"learning_rate": 3.551564310544612e-05,
"loss": 1.7739,
"step": 200000
},
{
"epoch": 58.08,
"learning_rate": 3.547943221320973e-05,
"loss": 1.5893,
"step": 200500
},
{
"epoch": 58.23,
"learning_rate": 3.5443221320973354e-05,
"loss": 1.7382,
"step": 201000
},
{
"epoch": 58.37,
"learning_rate": 3.5407010428736964e-05,
"loss": 1.587,
"step": 201500
},
{
"epoch": 58.52,
"learning_rate": 3.537079953650058e-05,
"loss": 1.6118,
"step": 202000
},
{
"epoch": 58.66,
"learning_rate": 3.5334588644264196e-05,
"loss": 1.5871,
"step": 202500
},
{
"epoch": 58.81,
"learning_rate": 3.529837775202781e-05,
"loss": 1.5877,
"step": 203000
},
{
"epoch": 58.95,
"learning_rate": 3.526216685979143e-05,
"loss": 1.6959,
"step": 203500
},
{
"epoch": 59.1,
"learning_rate": 3.5225955967555044e-05,
"loss": 1.6962,
"step": 204000
},
{
"epoch": 59.24,
"learning_rate": 3.518974507531866e-05,
"loss": 1.6245,
"step": 204500
},
{
"epoch": 59.39,
"learning_rate": 3.515353418308227e-05,
"loss": 1.6294,
"step": 205000
},
{
"epoch": 59.53,
"learning_rate": 3.5117323290845886e-05,
"loss": 1.6275,
"step": 205500
},
{
"epoch": 59.68,
"learning_rate": 3.508111239860951e-05,
"loss": 1.6681,
"step": 206000
},
{
"epoch": 59.82,
"learning_rate": 3.504490150637312e-05,
"loss": 1.5596,
"step": 206500
},
{
"epoch": 59.97,
"learning_rate": 3.5008690614136734e-05,
"loss": 1.629,
"step": 207000
},
{
"epoch": 60.11,
"learning_rate": 3.497247972190035e-05,
"loss": 1.6084,
"step": 207500
},
{
"epoch": 60.25,
"learning_rate": 3.4936268829663966e-05,
"loss": 1.5486,
"step": 208000
},
{
"epoch": 60.4,
"learning_rate": 3.4900057937427575e-05,
"loss": 1.5647,
"step": 208500
},
{
"epoch": 60.54,
"learning_rate": 3.486384704519119e-05,
"loss": 1.5691,
"step": 209000
},
{
"epoch": 60.69,
"learning_rate": 3.4827636152954814e-05,
"loss": 1.5658,
"step": 209500
},
{
"epoch": 60.83,
"learning_rate": 3.4791425260718424e-05,
"loss": 1.5257,
"step": 210000
},
{
"epoch": 60.98,
"learning_rate": 3.475521436848204e-05,
"loss": 1.5903,
"step": 210500
},
{
"epoch": 61.12,
"learning_rate": 3.4719003476245656e-05,
"loss": 1.514,
"step": 211000
},
{
"epoch": 61.27,
"learning_rate": 3.468279258400927e-05,
"loss": 1.4983,
"step": 211500
},
{
"epoch": 61.41,
"learning_rate": 3.464658169177289e-05,
"loss": 1.5336,
"step": 212000
},
{
"epoch": 61.56,
"learning_rate": 3.4610370799536504e-05,
"loss": 1.5524,
"step": 212500
},
{
"epoch": 61.7,
"learning_rate": 3.457415990730012e-05,
"loss": 1.5885,
"step": 213000
},
{
"epoch": 61.85,
"learning_rate": 3.453794901506373e-05,
"loss": 1.5389,
"step": 213500
},
{
"epoch": 61.99,
"learning_rate": 3.4501738122827346e-05,
"loss": 1.5126,
"step": 214000
},
{
"epoch": 62.14,
"learning_rate": 3.446552723059097e-05,
"loss": 1.4495,
"step": 214500
},
{
"epoch": 62.28,
"learning_rate": 3.442931633835458e-05,
"loss": 1.4506,
"step": 215000
},
{
"epoch": 62.43,
"learning_rate": 3.4393105446118194e-05,
"loss": 1.4459,
"step": 215500
},
{
"epoch": 62.57,
"learning_rate": 3.435689455388181e-05,
"loss": 1.5571,
"step": 216000
},
{
"epoch": 62.72,
"learning_rate": 3.4320683661645426e-05,
"loss": 1.5158,
"step": 216500
},
{
"epoch": 62.86,
"learning_rate": 3.4284472769409036e-05,
"loss": 1.5493,
"step": 217000
},
{
"epoch": 63.01,
"learning_rate": 3.424826187717265e-05,
"loss": 1.5083,
"step": 217500
},
{
"epoch": 63.15,
"learning_rate": 3.4212050984936275e-05,
"loss": 1.4297,
"step": 218000
},
{
"epoch": 63.3,
"learning_rate": 3.4175840092699884e-05,
"loss": 1.4847,
"step": 218500
},
{
"epoch": 63.44,
"learning_rate": 3.41396292004635e-05,
"loss": 1.446,
"step": 219000
},
{
"epoch": 63.59,
"learning_rate": 3.4103418308227116e-05,
"loss": 1.4292,
"step": 219500
},
{
"epoch": 63.73,
"learning_rate": 3.406720741599073e-05,
"loss": 1.5258,
"step": 220000
},
{
"epoch": 63.88,
"learning_rate": 3.403099652375435e-05,
"loss": 1.4968,
"step": 220500
},
{
"epoch": 64.02,
"learning_rate": 3.3994785631517964e-05,
"loss": 1.4877,
"step": 221000
},
{
"epoch": 64.17,
"learning_rate": 3.395857473928158e-05,
"loss": 1.4667,
"step": 221500
},
{
"epoch": 64.31,
"learning_rate": 3.392236384704519e-05,
"loss": 1.4413,
"step": 222000
},
{
"epoch": 64.46,
"learning_rate": 3.3886152954808806e-05,
"loss": 1.4228,
"step": 222500
},
{
"epoch": 64.6,
"learning_rate": 3.384994206257243e-05,
"loss": 1.4158,
"step": 223000
},
{
"epoch": 64.75,
"learning_rate": 3.381373117033604e-05,
"loss": 1.376,
"step": 223500
},
{
"epoch": 64.89,
"learning_rate": 3.3777520278099654e-05,
"loss": 1.4359,
"step": 224000
},
{
"epoch": 65.03,
"learning_rate": 3.374130938586327e-05,
"loss": 1.4994,
"step": 224500
},
{
"epoch": 65.18,
"learning_rate": 3.3705098493626886e-05,
"loss": 1.3646,
"step": 225000
},
{
"epoch": 65.32,
"learning_rate": 3.3668887601390496e-05,
"loss": 1.5025,
"step": 225500
},
{
"epoch": 65.47,
"learning_rate": 3.363267670915411e-05,
"loss": 1.4077,
"step": 226000
},
{
"epoch": 65.61,
"learning_rate": 3.3596465816917735e-05,
"loss": 1.3323,
"step": 226500
},
{
"epoch": 65.76,
"learning_rate": 3.3560254924681344e-05,
"loss": 1.4387,
"step": 227000
},
{
"epoch": 65.9,
"learning_rate": 3.352404403244496e-05,
"loss": 1.3418,
"step": 227500
},
{
"epoch": 66.05,
"learning_rate": 3.3487833140208576e-05,
"loss": 1.2793,
"step": 228000
},
{
"epoch": 66.19,
"learning_rate": 3.345162224797219e-05,
"loss": 1.3484,
"step": 228500
},
{
"epoch": 66.34,
"learning_rate": 3.341541135573581e-05,
"loss": 1.3419,
"step": 229000
},
{
"epoch": 66.48,
"learning_rate": 3.3379200463499424e-05,
"loss": 1.308,
"step": 229500
},
{
"epoch": 66.63,
"learning_rate": 3.334298957126304e-05,
"loss": 1.3193,
"step": 230000
},
{
"epoch": 66.77,
"learning_rate": 3.330677867902665e-05,
"loss": 1.4064,
"step": 230500
},
{
"epoch": 66.92,
"learning_rate": 3.3270567786790266e-05,
"loss": 1.3766,
"step": 231000
},
{
"epoch": 67.06,
"learning_rate": 3.323435689455388e-05,
"loss": 1.3482,
"step": 231500
},
{
"epoch": 67.21,
"learning_rate": 3.31981460023175e-05,
"loss": 1.348,
"step": 232000
},
{
"epoch": 67.35,
"learning_rate": 3.3161935110081114e-05,
"loss": 1.2824,
"step": 232500
},
{
"epoch": 67.5,
"learning_rate": 3.312572421784473e-05,
"loss": 1.2908,
"step": 233000
},
{
"epoch": 67.64,
"learning_rate": 3.3089513325608346e-05,
"loss": 1.4077,
"step": 233500
},
{
"epoch": 67.79,
"learning_rate": 3.3053302433371956e-05,
"loss": 1.3023,
"step": 234000
},
{
"epoch": 67.93,
"learning_rate": 3.301709154113558e-05,
"loss": 1.422,
"step": 234500
},
{
"epoch": 68.08,
"learning_rate": 3.2980880648899195e-05,
"loss": 1.3004,
"step": 235000
},
{
"epoch": 68.22,
"learning_rate": 3.2944669756662804e-05,
"loss": 1.2346,
"step": 235500
},
{
"epoch": 68.37,
"learning_rate": 3.290845886442642e-05,
"loss": 1.2779,
"step": 236000
},
{
"epoch": 68.51,
"learning_rate": 3.2872247972190036e-05,
"loss": 1.2945,
"step": 236500
},
{
"epoch": 68.66,
"learning_rate": 3.283603707995365e-05,
"loss": 1.2413,
"step": 237000
},
{
"epoch": 68.8,
"learning_rate": 3.279982618771727e-05,
"loss": 1.3038,
"step": 237500
},
{
"epoch": 68.95,
"learning_rate": 3.2763615295480884e-05,
"loss": 1.3092,
"step": 238000
},
{
"epoch": 69.09,
"learning_rate": 3.27274044032445e-05,
"loss": 1.2456,
"step": 238500
},
{
"epoch": 69.24,
"learning_rate": 3.269119351100811e-05,
"loss": 1.2915,
"step": 239000
},
{
"epoch": 69.38,
"learning_rate": 3.2654982618771726e-05,
"loss": 1.2733,
"step": 239500
},
{
"epoch": 69.52,
"learning_rate": 3.261877172653534e-05,
"loss": 1.2595,
"step": 240000
},
{
"epoch": 69.67,
"learning_rate": 3.258256083429896e-05,
"loss": 1.2344,
"step": 240500
},
{
"epoch": 69.81,
"learning_rate": 3.2546349942062574e-05,
"loss": 1.2483,
"step": 241000
},
{
"epoch": 69.96,
"learning_rate": 3.251013904982619e-05,
"loss": 1.3016,
"step": 241500
},
{
"epoch": 70.1,
"learning_rate": 3.2473928157589806e-05,
"loss": 1.1828,
"step": 242000
},
{
"epoch": 70.25,
"learning_rate": 3.2437717265353416e-05,
"loss": 1.2399,
"step": 242500
},
{
"epoch": 70.39,
"learning_rate": 3.240150637311704e-05,
"loss": 1.2375,
"step": 243000
},
{
"epoch": 70.54,
"learning_rate": 3.2365295480880655e-05,
"loss": 1.2156,
"step": 243500
},
{
"epoch": 70.68,
"learning_rate": 3.2329084588644264e-05,
"loss": 1.2149,
"step": 244000
},
{
"epoch": 70.83,
"learning_rate": 3.229287369640788e-05,
"loss": 1.2616,
"step": 244500
},
{
"epoch": 70.97,
"learning_rate": 3.2256662804171496e-05,
"loss": 1.2711,
"step": 245000
},
{
"epoch": 71.12,
"learning_rate": 3.222045191193511e-05,
"loss": 1.1306,
"step": 245500
},
{
"epoch": 71.26,
"learning_rate": 3.218424101969872e-05,
"loss": 1.1036,
"step": 246000
},
{
"epoch": 71.41,
"learning_rate": 3.2148030127462345e-05,
"loss": 1.1606,
"step": 246500
},
{
"epoch": 71.55,
"learning_rate": 3.211181923522596e-05,
"loss": 1.2608,
"step": 247000
},
{
"epoch": 71.7,
"learning_rate": 3.207560834298957e-05,
"loss": 1.2569,
"step": 247500
},
{
"epoch": 71.84,
"learning_rate": 3.2039397450753186e-05,
"loss": 1.2382,
"step": 248000
},
{
"epoch": 71.99,
"learning_rate": 3.20031865585168e-05,
"loss": 1.1877,
"step": 248500
},
{
"epoch": 72.13,
"learning_rate": 3.196697566628042e-05,
"loss": 1.2093,
"step": 249000
},
{
"epoch": 72.28,
"learning_rate": 3.1930764774044034e-05,
"loss": 1.133,
"step": 249500
},
{
"epoch": 72.42,
"learning_rate": 3.189455388180765e-05,
"loss": 1.1242,
"step": 250000
},
{
"epoch": 72.57,
"learning_rate": 3.1858342989571267e-05,
"loss": 1.1529,
"step": 250500
},
{
"epoch": 72.71,
"learning_rate": 3.1822132097334876e-05,
"loss": 1.1561,
"step": 251000
},
{
"epoch": 72.86,
"learning_rate": 3.17859212050985e-05,
"loss": 1.1941,
"step": 251500
},
{
"epoch": 73.0,
"learning_rate": 3.1749710312862115e-05,
"loss": 1.2591,
"step": 252000
},
{
"epoch": 73.15,
"learning_rate": 3.1713499420625724e-05,
"loss": 1.0791,
"step": 252500
},
{
"epoch": 73.29,
"learning_rate": 3.167728852838934e-05,
"loss": 1.1471,
"step": 253000
},
{
"epoch": 73.44,
"learning_rate": 3.1641077636152956e-05,
"loss": 1.0509,
"step": 253500
},
{
"epoch": 73.58,
"learning_rate": 3.160486674391657e-05,
"loss": 1.1248,
"step": 254000
},
{
"epoch": 73.73,
"learning_rate": 3.156865585168018e-05,
"loss": 1.1184,
"step": 254500
},
{
"epoch": 73.87,
"learning_rate": 3.1532444959443805e-05,
"loss": 1.2078,
"step": 255000
},
{
"epoch": 74.02,
"learning_rate": 3.149623406720742e-05,
"loss": 1.1616,
"step": 255500
},
{
"epoch": 74.16,
"learning_rate": 3.146002317497103e-05,
"loss": 1.0971,
"step": 256000
},
{
"epoch": 74.3,
"learning_rate": 3.1423812282734646e-05,
"loss": 1.1006,
"step": 256500
},
{
"epoch": 74.45,
"learning_rate": 3.138760139049826e-05,
"loss": 1.1215,
"step": 257000
},
{
"epoch": 74.59,
"learning_rate": 3.135139049826188e-05,
"loss": 1.1527,
"step": 257500
},
{
"epoch": 74.74,
"learning_rate": 3.1315179606025494e-05,
"loss": 1.1213,
"step": 258000
},
{
"epoch": 74.88,
"learning_rate": 3.127896871378911e-05,
"loss": 1.1626,
"step": 258500
},
{
"epoch": 75.03,
"learning_rate": 3.1242757821552727e-05,
"loss": 1.0862,
"step": 259000
},
{
"epoch": 75.17,
"learning_rate": 3.1206546929316336e-05,
"loss": 1.0992,
"step": 259500
},
{
"epoch": 75.32,
"learning_rate": 3.117033603707996e-05,
"loss": 1.0891,
"step": 260000
},
{
"epoch": 75.46,
"learning_rate": 3.113412514484357e-05,
"loss": 1.0892,
"step": 260500
},
{
"epoch": 75.61,
"learning_rate": 3.1097914252607184e-05,
"loss": 1.0949,
"step": 261000
},
{
"epoch": 75.75,
"learning_rate": 3.10617033603708e-05,
"loss": 1.0752,
"step": 261500
},
{
"epoch": 75.9,
"learning_rate": 3.1025492468134416e-05,
"loss": 1.1233,
"step": 262000
},
{
"epoch": 76.04,
"learning_rate": 3.098928157589803e-05,
"loss": 1.1272,
"step": 262500
},
{
"epoch": 76.19,
"learning_rate": 3.095307068366164e-05,
"loss": 1.064,
"step": 263000
},
{
"epoch": 76.33,
"learning_rate": 3.0916859791425265e-05,
"loss": 1.0854,
"step": 263500
},
{
"epoch": 76.48,
"learning_rate": 3.088064889918888e-05,
"loss": 1.0471,
"step": 264000
},
{
"epoch": 76.62,
"learning_rate": 3.084443800695249e-05,
"loss": 1.0993,
"step": 264500
},
{
"epoch": 76.77,
"learning_rate": 3.0808227114716106e-05,
"loss": 1.1075,
"step": 265000
},
{
"epoch": 76.91,
"learning_rate": 3.077201622247972e-05,
"loss": 1.0724,
"step": 265500
},
{
"epoch": 77.06,
"learning_rate": 3.073580533024334e-05,
"loss": 1.0793,
"step": 266000
},
{
"epoch": 77.2,
"learning_rate": 3.0699594438006954e-05,
"loss": 1.0535,
"step": 266500
},
{
"epoch": 77.35,
"learning_rate": 3.066338354577057e-05,
"loss": 0.9771,
"step": 267000
},
{
"epoch": 77.49,
"learning_rate": 3.062717265353419e-05,
"loss": 1.0115,
"step": 267500
},
{
"epoch": 77.64,
"learning_rate": 3.0590961761297796e-05,
"loss": 1.0827,
"step": 268000
},
{
"epoch": 77.78,
"learning_rate": 3.055475086906142e-05,
"loss": 1.0376,
"step": 268500
},
{
"epoch": 77.93,
"learning_rate": 3.051853997682503e-05,
"loss": 1.0339,
"step": 269000
},
{
"epoch": 78.07,
"learning_rate": 3.0482329084588644e-05,
"loss": 1.0429,
"step": 269500
},
{
"epoch": 78.22,
"learning_rate": 3.0446118192352264e-05,
"loss": 1.0557,
"step": 270000
},
{
"epoch": 78.36,
"learning_rate": 3.0409907300115876e-05,
"loss": 0.9607,
"step": 270500
},
{
"epoch": 78.51,
"learning_rate": 3.0373696407879493e-05,
"loss": 1.0016,
"step": 271000
},
{
"epoch": 78.65,
"learning_rate": 3.0337485515643105e-05,
"loss": 1.0083,
"step": 271500
},
{
"epoch": 78.79,
"learning_rate": 3.030127462340672e-05,
"loss": 1.0785,
"step": 272000
},
{
"epoch": 78.94,
"learning_rate": 3.026506373117034e-05,
"loss": 1.0867,
"step": 272500
},
{
"epoch": 79.08,
"learning_rate": 3.0228852838933954e-05,
"loss": 0.9776,
"step": 273000
},
{
"epoch": 79.23,
"learning_rate": 3.019264194669757e-05,
"loss": 0.9559,
"step": 273500
},
{
"epoch": 79.37,
"learning_rate": 3.0156431054461182e-05,
"loss": 0.9912,
"step": 274000
},
{
"epoch": 79.52,
"learning_rate": 3.01202201622248e-05,
"loss": 1.0311,
"step": 274500
},
{
"epoch": 79.66,
"learning_rate": 3.008400926998841e-05,
"loss": 1.0281,
"step": 275000
},
{
"epoch": 79.81,
"learning_rate": 3.0047798377752027e-05,
"loss": 0.9119,
"step": 275500
},
{
"epoch": 79.95,
"learning_rate": 3.0011587485515647e-05,
"loss": 1.0614,
"step": 276000
},
{
"epoch": 80.1,
"learning_rate": 2.997537659327926e-05,
"loss": 1.0004,
"step": 276500
},
{
"epoch": 80.24,
"learning_rate": 2.9939165701042876e-05,
"loss": 0.9568,
"step": 277000
},
{
"epoch": 80.39,
"learning_rate": 2.9902954808806488e-05,
"loss": 0.9485,
"step": 277500
},
{
"epoch": 80.53,
"learning_rate": 2.9866743916570104e-05,
"loss": 0.9932,
"step": 278000
},
{
"epoch": 80.68,
"learning_rate": 2.9830533024333724e-05,
"loss": 0.9095,
"step": 278500
},
{
"epoch": 80.82,
"learning_rate": 2.9794322132097337e-05,
"loss": 0.929,
"step": 279000
},
{
"epoch": 80.97,
"learning_rate": 2.9758111239860953e-05,
"loss": 0.9774,
"step": 279500
},
{
"epoch": 81.11,
"learning_rate": 2.9721900347624565e-05,
"loss": 0.9507,
"step": 280000
},
{
"epoch": 81.26,
"learning_rate": 2.968568945538818e-05,
"loss": 0.9107,
"step": 280500
},
{
"epoch": 81.4,
"learning_rate": 2.96494785631518e-05,
"loss": 0.9239,
"step": 281000
},
{
"epoch": 81.55,
"learning_rate": 2.9613267670915414e-05,
"loss": 0.9795,
"step": 281500
},
{
"epoch": 81.69,
"learning_rate": 2.957705677867903e-05,
"loss": 0.9762,
"step": 282000
},
{
"epoch": 81.84,
"learning_rate": 2.9540845886442642e-05,
"loss": 0.9214,
"step": 282500
},
{
"epoch": 81.98,
"learning_rate": 2.950463499420626e-05,
"loss": 1.0133,
"step": 283000
},
{
"epoch": 82.13,
"learning_rate": 2.946842410196987e-05,
"loss": 0.9045,
"step": 283500
},
{
"epoch": 82.27,
"learning_rate": 2.943221320973349e-05,
"loss": 0.91,
"step": 284000
},
{
"epoch": 82.42,
"learning_rate": 2.9396002317497107e-05,
"loss": 0.9366,
"step": 284500
},
{
"epoch": 82.56,
"learning_rate": 2.935979142526072e-05,
"loss": 0.8921,
"step": 285000
},
{
"epoch": 82.71,
"learning_rate": 2.9323580533024336e-05,
"loss": 0.9532,
"step": 285500
},
{
"epoch": 82.85,
"learning_rate": 2.928736964078795e-05,
"loss": 0.9885,
"step": 286000
},
{
"epoch": 83.0,
"learning_rate": 2.9251158748551564e-05,
"loss": 0.953,
"step": 286500
},
{
"epoch": 83.14,
"learning_rate": 2.9214947856315184e-05,
"loss": 0.8898,
"step": 287000
},
{
"epoch": 83.29,
"learning_rate": 2.9178736964078797e-05,
"loss": 0.8683,
"step": 287500
},
{
"epoch": 83.43,
"learning_rate": 2.9142526071842413e-05,
"loss": 0.8697,
"step": 288000
},
{
"epoch": 83.57,
"learning_rate": 2.9106315179606025e-05,
"loss": 0.9246,
"step": 288500
},
{
"epoch": 83.72,
"learning_rate": 2.907010428736964e-05,
"loss": 0.904,
"step": 289000
},
{
"epoch": 83.86,
"learning_rate": 2.9033893395133254e-05,
"loss": 0.8879,
"step": 289500
},
{
"epoch": 84.01,
"learning_rate": 2.8997682502896874e-05,
"loss": 0.9338,
"step": 290000
},
{
"epoch": 84.15,
"learning_rate": 2.896147161066049e-05,
"loss": 0.8704,
"step": 290500
},
{
"epoch": 84.3,
"learning_rate": 2.8925260718424102e-05,
"loss": 0.8463,
"step": 291000
},
{
"epoch": 84.44,
"learning_rate": 2.888904982618772e-05,
"loss": 0.8428,
"step": 291500
},
{
"epoch": 84.59,
"learning_rate": 2.885283893395133e-05,
"loss": 0.9357,
"step": 292000
},
{
"epoch": 84.73,
"learning_rate": 2.881662804171495e-05,
"loss": 0.8905,
"step": 292500
},
{
"epoch": 84.88,
"learning_rate": 2.8780417149478567e-05,
"loss": 0.9016,
"step": 293000
},
{
"epoch": 85.02,
"learning_rate": 2.874420625724218e-05,
"loss": 0.9352,
"step": 293500
},
{
"epoch": 85.17,
"learning_rate": 2.8707995365005796e-05,
"loss": 0.8342,
"step": 294000
},
{
"epoch": 85.31,
"learning_rate": 2.867178447276941e-05,
"loss": 0.8075,
"step": 294500
},
{
"epoch": 85.46,
"learning_rate": 2.8635573580533024e-05,
"loss": 0.8817,
"step": 295000
},
{
"epoch": 85.6,
"learning_rate": 2.8599362688296644e-05,
"loss": 0.8684,
"step": 295500
},
{
"epoch": 85.75,
"learning_rate": 2.8563151796060257e-05,
"loss": 0.78,
"step": 296000
},
{
"epoch": 85.89,
"learning_rate": 2.8526940903823873e-05,
"loss": 0.9032,
"step": 296500
},
{
"epoch": 86.04,
"learning_rate": 2.8490730011587485e-05,
"loss": 0.8435,
"step": 297000
},
{
"epoch": 86.18,
"learning_rate": 2.84545191193511e-05,
"loss": 0.8357,
"step": 297500
},
{
"epoch": 86.33,
"learning_rate": 2.8418308227114714e-05,
"loss": 0.8398,
"step": 298000
},
{
"epoch": 86.47,
"learning_rate": 2.8382097334878334e-05,
"loss": 0.8226,
"step": 298500
},
{
"epoch": 86.62,
"learning_rate": 2.834588644264195e-05,
"loss": 0.8855,
"step": 299000
},
{
"epoch": 86.76,
"learning_rate": 2.8309675550405563e-05,
"loss": 0.8002,
"step": 299500
},
{
"epoch": 86.91,
"learning_rate": 2.827346465816918e-05,
"loss": 0.8452,
"step": 300000
},
{
"epoch": 87.05,
"learning_rate": 2.823725376593279e-05,
"loss": 0.917,
"step": 300500
},
{
"epoch": 87.2,
"learning_rate": 2.820104287369641e-05,
"loss": 0.8106,
"step": 301000
},
{
"epoch": 87.34,
"learning_rate": 2.8164831981460027e-05,
"loss": 0.8121,
"step": 301500
},
{
"epoch": 87.49,
"learning_rate": 2.812862108922364e-05,
"loss": 0.8031,
"step": 302000
},
{
"epoch": 87.63,
"learning_rate": 2.8092410196987256e-05,
"loss": 0.7967,
"step": 302500
},
{
"epoch": 87.78,
"learning_rate": 2.805619930475087e-05,
"loss": 0.8531,
"step": 303000
},
{
"epoch": 87.92,
"learning_rate": 2.8019988412514488e-05,
"loss": 0.8355,
"step": 303500
},
{
"epoch": 88.06,
"learning_rate": 2.7983777520278097e-05,
"loss": 0.8451,
"step": 304000
},
{
"epoch": 88.21,
"learning_rate": 2.7947566628041717e-05,
"loss": 0.787,
"step": 304500
},
{
"epoch": 88.35,
"learning_rate": 2.7911355735805333e-05,
"loss": 0.8058,
"step": 305000
},
{
"epoch": 88.5,
"learning_rate": 2.7875144843568946e-05,
"loss": 0.802,
"step": 305500
},
{
"epoch": 88.64,
"learning_rate": 2.783893395133256e-05,
"loss": 0.8519,
"step": 306000
},
{
"epoch": 88.79,
"learning_rate": 2.7802723059096174e-05,
"loss": 0.7892,
"step": 306500
},
{
"epoch": 88.93,
"learning_rate": 2.7766512166859794e-05,
"loss": 0.7878,
"step": 307000
},
{
"epoch": 89.08,
"learning_rate": 2.773030127462341e-05,
"loss": 0.7799,
"step": 307500
},
{
"epoch": 89.22,
"learning_rate": 2.7694090382387023e-05,
"loss": 0.7811,
"step": 308000
},
{
"epoch": 89.37,
"learning_rate": 2.765787949015064e-05,
"loss": 0.7494,
"step": 308500
},
{
"epoch": 89.51,
"learning_rate": 2.762166859791425e-05,
"loss": 0.7852,
"step": 309000
},
{
"epoch": 89.66,
"learning_rate": 2.758545770567787e-05,
"loss": 0.8175,
"step": 309500
},
{
"epoch": 89.8,
"learning_rate": 2.7549246813441487e-05,
"loss": 0.7376,
"step": 310000
},
{
"epoch": 89.95,
"learning_rate": 2.75130359212051e-05,
"loss": 0.7756,
"step": 310500
},
{
"epoch": 90.09,
"learning_rate": 2.7476825028968716e-05,
"loss": 0.7502,
"step": 311000
},
{
"epoch": 90.24,
"learning_rate": 2.744061413673233e-05,
"loss": 0.715,
"step": 311500
},
{
"epoch": 90.38,
"learning_rate": 2.7404403244495948e-05,
"loss": 0.7592,
"step": 312000
},
{
"epoch": 90.53,
"learning_rate": 2.7368192352259557e-05,
"loss": 0.756,
"step": 312500
},
{
"epoch": 90.67,
"learning_rate": 2.7331981460023177e-05,
"loss": 0.8058,
"step": 313000
},
{
"epoch": 90.82,
"learning_rate": 2.7295770567786793e-05,
"loss": 0.7458,
"step": 313500
},
{
"epoch": 90.96,
"learning_rate": 2.7259559675550406e-05,
"loss": 0.7984,
"step": 314000
},
{
"epoch": 91.11,
"learning_rate": 2.7223348783314022e-05,
"loss": 0.7359,
"step": 314500
},
{
"epoch": 91.25,
"learning_rate": 2.7187137891077634e-05,
"loss": 0.7224,
"step": 315000
},
{
"epoch": 91.4,
"learning_rate": 2.7150926998841254e-05,
"loss": 0.7386,
"step": 315500
},
{
"epoch": 91.54,
"learning_rate": 2.711471610660487e-05,
"loss": 0.7468,
"step": 316000
},
{
"epoch": 91.69,
"learning_rate": 2.7078505214368483e-05,
"loss": 0.7356,
"step": 316500
},
{
"epoch": 91.83,
"learning_rate": 2.70422943221321e-05,
"loss": 0.7545,
"step": 317000
},
{
"epoch": 91.98,
"learning_rate": 2.700608342989571e-05,
"loss": 0.7898,
"step": 317500
},
{
"epoch": 92.12,
"learning_rate": 2.696987253765933e-05,
"loss": 0.7097,
"step": 318000
},
{
"epoch": 92.27,
"learning_rate": 2.693366164542294e-05,
"loss": 0.7192,
"step": 318500
},
{
"epoch": 92.41,
"learning_rate": 2.689745075318656e-05,
"loss": 0.7318,
"step": 319000
},
{
"epoch": 92.56,
"learning_rate": 2.6861239860950176e-05,
"loss": 0.6905,
"step": 319500
},
{
"epoch": 92.7,
"learning_rate": 2.682502896871379e-05,
"loss": 0.7404,
"step": 320000
},
{
"epoch": 92.84,
"learning_rate": 2.6788818076477408e-05,
"loss": 0.7495,
"step": 320500
},
{
"epoch": 92.99,
"learning_rate": 2.6752607184241017e-05,
"loss": 0.7459,
"step": 321000
},
{
"epoch": 93.13,
"learning_rate": 2.6716396292004637e-05,
"loss": 0.7149,
"step": 321500
},
{
"epoch": 93.28,
"learning_rate": 2.6680185399768253e-05,
"loss": 0.6852,
"step": 322000
},
{
"epoch": 93.42,
"learning_rate": 2.6643974507531866e-05,
"loss": 0.6797,
"step": 322500
},
{
"epoch": 93.57,
"learning_rate": 2.6607763615295482e-05,
"loss": 0.6807,
"step": 323000
},
{
"epoch": 93.71,
"learning_rate": 2.6571552723059094e-05,
"loss": 0.7131,
"step": 323500
},
{
"epoch": 93.86,
"learning_rate": 2.6535341830822714e-05,
"loss": 0.7032,
"step": 324000
},
{
"epoch": 94.0,
"learning_rate": 2.649913093858633e-05,
"loss": 0.7271,
"step": 324500
},
{
"epoch": 94.15,
"learning_rate": 2.6462920046349943e-05,
"loss": 0.6575,
"step": 325000
},
{
"epoch": 94.29,
"learning_rate": 2.642670915411356e-05,
"loss": 0.7185,
"step": 325500
},
{
"epoch": 94.44,
"learning_rate": 2.639049826187717e-05,
"loss": 0.7487,
"step": 326000
},
{
"epoch": 94.58,
"learning_rate": 2.635428736964079e-05,
"loss": 0.7181,
"step": 326500
},
{
"epoch": 94.73,
"learning_rate": 2.63180764774044e-05,
"loss": 0.6834,
"step": 327000
},
{
"epoch": 94.87,
"learning_rate": 2.628186558516802e-05,
"loss": 0.7236,
"step": 327500
},
{
"epoch": 95.02,
"learning_rate": 2.6245654692931636e-05,
"loss": 0.6998,
"step": 328000
},
{
"epoch": 95.16,
"learning_rate": 2.620944380069525e-05,
"loss": 0.6912,
"step": 328500
},
{
"epoch": 95.31,
"learning_rate": 2.6173232908458868e-05,
"loss": 0.6545,
"step": 329000
},
{
"epoch": 95.45,
"learning_rate": 2.6137022016222477e-05,
"loss": 0.6852,
"step": 329500
},
{
"epoch": 95.6,
"learning_rate": 2.6100811123986097e-05,
"loss": 0.6328,
"step": 330000
},
{
"epoch": 95.74,
"learning_rate": 2.6064600231749713e-05,
"loss": 0.6718,
"step": 330500
},
{
"epoch": 95.89,
"learning_rate": 2.6028389339513326e-05,
"loss": 0.6903,
"step": 331000
},
{
"epoch": 96.03,
"learning_rate": 2.5992178447276945e-05,
"loss": 0.663,
"step": 331500
},
{
"epoch": 96.18,
"learning_rate": 2.5955967555040555e-05,
"loss": 0.6634,
"step": 332000
},
{
"epoch": 96.32,
"learning_rate": 2.5919756662804174e-05,
"loss": 0.6536,
"step": 332500
},
{
"epoch": 96.47,
"learning_rate": 2.5883545770567787e-05,
"loss": 0.6705,
"step": 333000
},
{
"epoch": 96.61,
"learning_rate": 2.5847334878331403e-05,
"loss": 0.6472,
"step": 333500
},
{
"epoch": 96.76,
"learning_rate": 2.581112398609502e-05,
"loss": 0.6946,
"step": 334000
},
{
"epoch": 96.9,
"learning_rate": 2.577491309385863e-05,
"loss": 0.6203,
"step": 334500
},
{
"epoch": 97.05,
"learning_rate": 2.573870220162225e-05,
"loss": 0.6359,
"step": 335000
},
{
"epoch": 97.19,
"learning_rate": 2.570249130938586e-05,
"loss": 0.6363,
"step": 335500
},
{
"epoch": 97.33,
"learning_rate": 2.566628041714948e-05,
"loss": 0.6343,
"step": 336000
},
{
"epoch": 97.48,
"learning_rate": 2.5630069524913096e-05,
"loss": 0.6251,
"step": 336500
},
{
"epoch": 97.62,
"learning_rate": 2.559385863267671e-05,
"loss": 0.6324,
"step": 337000
},
{
"epoch": 97.77,
"learning_rate": 2.5557647740440328e-05,
"loss": 0.6567,
"step": 337500
},
{
"epoch": 97.91,
"learning_rate": 2.5521436848203938e-05,
"loss": 0.6568,
"step": 338000
},
{
"epoch": 98.06,
"learning_rate": 2.5485225955967557e-05,
"loss": 0.6158,
"step": 338500
},
{
"epoch": 98.2,
"learning_rate": 2.5449015063731173e-05,
"loss": 0.5807,
"step": 339000
},
{
"epoch": 98.35,
"learning_rate": 2.5412804171494786e-05,
"loss": 0.6511,
"step": 339500
},
{
"epoch": 98.49,
"learning_rate": 2.5376593279258405e-05,
"loss": 0.6278,
"step": 340000
},
{
"epoch": 98.64,
"learning_rate": 2.5340382387022015e-05,
"loss": 0.6598,
"step": 340500
},
{
"epoch": 98.78,
"learning_rate": 2.5304171494785634e-05,
"loss": 0.6021,
"step": 341000
},
{
"epoch": 98.93,
"learning_rate": 2.5267960602549247e-05,
"loss": 0.6365,
"step": 341500
},
{
"epoch": 99.07,
"learning_rate": 2.5231749710312863e-05,
"loss": 0.6539,
"step": 342000
},
{
"epoch": 99.22,
"learning_rate": 2.519553881807648e-05,
"loss": 0.5917,
"step": 342500
},
{
"epoch": 99.36,
"learning_rate": 2.5159327925840092e-05,
"loss": 0.6234,
"step": 343000
},
{
"epoch": 99.51,
"learning_rate": 2.512311703360371e-05,
"loss": 0.6112,
"step": 343500
},
{
"epoch": 99.65,
"learning_rate": 2.5086906141367324e-05,
"loss": 0.6155,
"step": 344000
},
{
"epoch": 99.8,
"learning_rate": 2.505069524913094e-05,
"loss": 0.5829,
"step": 344500
},
{
"epoch": 99.94,
"learning_rate": 2.5014484356894556e-05,
"loss": 0.6073,
"step": 345000
},
{
"epoch": 100.09,
"learning_rate": 2.497827346465817e-05,
"loss": 0.5886,
"step": 345500
},
{
"epoch": 100.23,
"learning_rate": 2.4942062572421785e-05,
"loss": 0.5803,
"step": 346000
},
{
"epoch": 100.38,
"learning_rate": 2.49058516801854e-05,
"loss": 0.5704,
"step": 346500
},
{
"epoch": 100.52,
"learning_rate": 2.4869640787949017e-05,
"loss": 0.5902,
"step": 347000
},
{
"epoch": 100.67,
"learning_rate": 2.4833429895712633e-05,
"loss": 0.5799,
"step": 347500
},
{
"epoch": 100.81,
"learning_rate": 2.4797219003476246e-05,
"loss": 0.5898,
"step": 348000
},
{
"epoch": 100.96,
"learning_rate": 2.4761008111239862e-05,
"loss": 0.6129,
"step": 348500
},
{
"epoch": 101.1,
"learning_rate": 2.4724797219003478e-05,
"loss": 0.6226,
"step": 349000
},
{
"epoch": 101.25,
"learning_rate": 2.4688586326767094e-05,
"loss": 0.6064,
"step": 349500
},
{
"epoch": 101.39,
"learning_rate": 2.4652375434530707e-05,
"loss": 0.5727,
"step": 350000
},
{
"epoch": 101.54,
"learning_rate": 2.4616164542294323e-05,
"loss": 0.5478,
"step": 350500
},
{
"epoch": 101.68,
"learning_rate": 2.457995365005794e-05,
"loss": 0.5586,
"step": 351000
},
{
"epoch": 101.83,
"learning_rate": 2.4543742757821552e-05,
"loss": 0.5872,
"step": 351500
},
{
"epoch": 101.97,
"learning_rate": 2.450753186558517e-05,
"loss": 0.6057,
"step": 352000
},
{
"epoch": 102.11,
"learning_rate": 2.4471320973348784e-05,
"loss": 0.5403,
"step": 352500
},
{
"epoch": 102.26,
"learning_rate": 2.44351100811124e-05,
"loss": 0.5902,
"step": 353000
},
{
"epoch": 102.4,
"learning_rate": 2.4398899188876016e-05,
"loss": 0.5792,
"step": 353500
},
{
"epoch": 102.55,
"learning_rate": 2.436268829663963e-05,
"loss": 0.6074,
"step": 354000
},
{
"epoch": 102.69,
"learning_rate": 2.4326477404403245e-05,
"loss": 0.5505,
"step": 354500
},
{
"epoch": 102.84,
"learning_rate": 2.429026651216686e-05,
"loss": 0.6083,
"step": 355000
},
{
"epoch": 102.98,
"learning_rate": 2.4254055619930477e-05,
"loss": 0.5578,
"step": 355500
},
{
"epoch": 103.13,
"learning_rate": 2.4217844727694093e-05,
"loss": 0.5082,
"step": 356000
},
{
"epoch": 103.27,
"learning_rate": 2.4181633835457706e-05,
"loss": 0.5695,
"step": 356500
},
{
"epoch": 103.42,
"learning_rate": 2.4145422943221322e-05,
"loss": 0.5668,
"step": 357000
},
{
"epoch": 103.56,
"learning_rate": 2.4109212050984935e-05,
"loss": 0.5363,
"step": 357500
},
{
"epoch": 103.71,
"learning_rate": 2.4073001158748554e-05,
"loss": 0.5375,
"step": 358000
},
{
"epoch": 103.85,
"learning_rate": 2.4036790266512167e-05,
"loss": 0.5513,
"step": 358500
},
{
"epoch": 104.0,
"learning_rate": 2.4000579374275783e-05,
"loss": 0.5869,
"step": 359000
},
{
"epoch": 104.14,
"learning_rate": 2.39643684820394e-05,
"loss": 0.55,
"step": 359500
},
{
"epoch": 104.29,
"learning_rate": 2.3928157589803012e-05,
"loss": 0.5484,
"step": 360000
},
{
"epoch": 104.43,
"learning_rate": 2.3891946697566628e-05,
"loss": 0.5442,
"step": 360500
},
{
"epoch": 104.58,
"learning_rate": 2.3855735805330244e-05,
"loss": 0.5365,
"step": 361000
},
{
"epoch": 104.72,
"learning_rate": 2.381952491309386e-05,
"loss": 0.5659,
"step": 361500
},
{
"epoch": 104.87,
"learning_rate": 2.3783314020857476e-05,
"loss": 0.5471,
"step": 362000
},
{
"epoch": 105.01,
"learning_rate": 2.374710312862109e-05,
"loss": 0.5269,
"step": 362500
},
{
"epoch": 105.16,
"learning_rate": 2.3710892236384705e-05,
"loss": 0.5456,
"step": 363000
},
{
"epoch": 105.3,
"learning_rate": 2.367468134414832e-05,
"loss": 0.526,
"step": 363500
},
{
"epoch": 105.45,
"learning_rate": 2.3638470451911937e-05,
"loss": 0.5215,
"step": 364000
},
{
"epoch": 105.59,
"learning_rate": 2.3602259559675553e-05,
"loss": 0.5163,
"step": 364500
},
{
"epoch": 105.74,
"learning_rate": 2.3566048667439166e-05,
"loss": 0.5441,
"step": 365000
},
{
"epoch": 105.88,
"learning_rate": 2.3529837775202782e-05,
"loss": 0.5098,
"step": 365500
},
{
"epoch": 106.03,
"learning_rate": 2.3493626882966395e-05,
"loss": 0.5359,
"step": 366000
},
{
"epoch": 106.17,
"learning_rate": 2.3457415990730014e-05,
"loss": 0.5015,
"step": 366500
},
{
"epoch": 106.32,
"learning_rate": 2.342120509849363e-05,
"loss": 0.4858,
"step": 367000
},
{
"epoch": 106.46,
"learning_rate": 2.3384994206257243e-05,
"loss": 0.5311,
"step": 367500
},
{
"epoch": 106.6,
"learning_rate": 2.334878331402086e-05,
"loss": 0.5061,
"step": 368000
},
{
"epoch": 106.75,
"learning_rate": 2.3312572421784472e-05,
"loss": 0.5404,
"step": 368500
},
{
"epoch": 106.89,
"learning_rate": 2.3276361529548088e-05,
"loss": 0.5108,
"step": 369000
},
{
"epoch": 107.04,
"learning_rate": 2.3240150637311704e-05,
"loss": 0.4969,
"step": 369500
},
{
"epoch": 107.18,
"learning_rate": 2.320393974507532e-05,
"loss": 0.4969,
"step": 370000
},
{
"epoch": 107.33,
"learning_rate": 2.3167728852838936e-05,
"loss": 0.5172,
"step": 370500
},
{
"epoch": 107.47,
"learning_rate": 2.313151796060255e-05,
"loss": 0.5653,
"step": 371000
},
{
"epoch": 107.62,
"learning_rate": 2.3095307068366165e-05,
"loss": 0.517,
"step": 371500
},
{
"epoch": 107.76,
"learning_rate": 2.305909617612978e-05,
"loss": 0.496,
"step": 372000
},
{
"epoch": 107.91,
"learning_rate": 2.3022885283893397e-05,
"loss": 0.5357,
"step": 372500
},
{
"epoch": 108.05,
"learning_rate": 2.2986674391657013e-05,
"loss": 0.5115,
"step": 373000
},
{
"epoch": 108.2,
"learning_rate": 2.2950463499420626e-05,
"loss": 0.4683,
"step": 373500
},
{
"epoch": 108.34,
"learning_rate": 2.2914252607184242e-05,
"loss": 0.5017,
"step": 374000
},
{
"epoch": 108.49,
"learning_rate": 2.2878041714947855e-05,
"loss": 0.479,
"step": 374500
},
{
"epoch": 108.63,
"learning_rate": 2.284183082271147e-05,
"loss": 0.4886,
"step": 375000
},
{
"epoch": 108.78,
"learning_rate": 2.280561993047509e-05,
"loss": 0.4825,
"step": 375500
},
{
"epoch": 108.92,
"learning_rate": 2.2769409038238703e-05,
"loss": 0.4878,
"step": 376000
},
{
"epoch": 109.07,
"learning_rate": 2.273319814600232e-05,
"loss": 0.4942,
"step": 376500
},
{
"epoch": 109.21,
"learning_rate": 2.2696987253765932e-05,
"loss": 0.5016,
"step": 377000
},
{
"epoch": 109.36,
"learning_rate": 2.2660776361529548e-05,
"loss": 0.4747,
"step": 377500
},
{
"epoch": 109.5,
"learning_rate": 2.2624565469293164e-05,
"loss": 0.4661,
"step": 378000
},
{
"epoch": 109.65,
"learning_rate": 2.258835457705678e-05,
"loss": 0.4792,
"step": 378500
},
{
"epoch": 109.79,
"learning_rate": 2.2552143684820396e-05,
"loss": 0.5083,
"step": 379000
},
{
"epoch": 109.94,
"learning_rate": 2.251593279258401e-05,
"loss": 0.5082,
"step": 379500
},
{
"epoch": 110.08,
"learning_rate": 2.2479721900347625e-05,
"loss": 0.4781,
"step": 380000
},
{
"epoch": 110.23,
"learning_rate": 2.244351100811124e-05,
"loss": 0.4532,
"step": 380500
},
{
"epoch": 110.37,
"learning_rate": 2.2407300115874857e-05,
"loss": 0.4799,
"step": 381000
},
{
"epoch": 110.52,
"learning_rate": 2.2371089223638473e-05,
"loss": 0.47,
"step": 381500
},
{
"epoch": 110.66,
"learning_rate": 2.2334878331402086e-05,
"loss": 0.4906,
"step": 382000
},
{
"epoch": 110.81,
"learning_rate": 2.2298667439165702e-05,
"loss": 0.5021,
"step": 382500
},
{
"epoch": 110.95,
"learning_rate": 2.226245654692932e-05,
"loss": 0.505,
"step": 383000
},
{
"epoch": 111.1,
"learning_rate": 2.222624565469293e-05,
"loss": 0.4447,
"step": 383500
},
{
"epoch": 111.24,
"learning_rate": 2.219003476245655e-05,
"loss": 0.4363,
"step": 384000
},
{
"epoch": 111.38,
"learning_rate": 2.2153823870220163e-05,
"loss": 0.4352,
"step": 384500
},
{
"epoch": 111.53,
"learning_rate": 2.211761297798378e-05,
"loss": 0.4598,
"step": 385000
},
{
"epoch": 111.67,
"learning_rate": 2.2081402085747392e-05,
"loss": 0.4937,
"step": 385500
},
{
"epoch": 111.82,
"learning_rate": 2.2045191193511008e-05,
"loss": 0.5125,
"step": 386000
},
{
"epoch": 111.96,
"learning_rate": 2.2008980301274624e-05,
"loss": 0.446,
"step": 386500
},
{
"epoch": 112.11,
"learning_rate": 2.197276940903824e-05,
"loss": 0.4852,
"step": 387000
},
{
"epoch": 112.25,
"learning_rate": 2.1936558516801856e-05,
"loss": 0.4505,
"step": 387500
},
{
"epoch": 112.4,
"learning_rate": 2.190034762456547e-05,
"loss": 0.4709,
"step": 388000
},
{
"epoch": 112.54,
"learning_rate": 2.1864136732329085e-05,
"loss": 0.4521,
"step": 388500
},
{
"epoch": 112.69,
"learning_rate": 2.18279258400927e-05,
"loss": 0.4275,
"step": 389000
},
{
"epoch": 112.83,
"learning_rate": 2.1791714947856314e-05,
"loss": 0.4625,
"step": 389500
},
{
"epoch": 112.98,
"learning_rate": 2.1755504055619934e-05,
"loss": 0.4842,
"step": 390000
},
{
"epoch": 113.12,
"learning_rate": 2.1719293163383546e-05,
"loss": 0.4591,
"step": 390500
},
{
"epoch": 113.27,
"learning_rate": 2.1683082271147162e-05,
"loss": 0.4878,
"step": 391000
},
{
"epoch": 113.41,
"learning_rate": 2.164687137891078e-05,
"loss": 0.4352,
"step": 391500
},
{
"epoch": 113.56,
"learning_rate": 2.161066048667439e-05,
"loss": 0.4387,
"step": 392000
},
{
"epoch": 113.7,
"learning_rate": 2.157444959443801e-05,
"loss": 0.4172,
"step": 392500
},
{
"epoch": 113.85,
"learning_rate": 2.1538238702201623e-05,
"loss": 0.4304,
"step": 393000
},
{
"epoch": 113.99,
"learning_rate": 2.150202780996524e-05,
"loss": 0.4296,
"step": 393500
},
{
"epoch": 114.14,
"learning_rate": 2.1465816917728852e-05,
"loss": 0.4355,
"step": 394000
},
{
"epoch": 114.28,
"learning_rate": 2.1429606025492468e-05,
"loss": 0.4185,
"step": 394500
},
{
"epoch": 114.43,
"learning_rate": 2.1393395133256084e-05,
"loss": 0.4318,
"step": 395000
},
{
"epoch": 114.57,
"learning_rate": 2.13571842410197e-05,
"loss": 0.4081,
"step": 395500
},
{
"epoch": 114.72,
"learning_rate": 2.1320973348783317e-05,
"loss": 0.4273,
"step": 396000
},
{
"epoch": 114.86,
"learning_rate": 2.128476245654693e-05,
"loss": 0.4367,
"step": 396500
},
{
"epoch": 115.01,
"learning_rate": 2.1248551564310545e-05,
"loss": 0.4666,
"step": 397000
},
{
"epoch": 115.15,
"learning_rate": 2.121234067207416e-05,
"loss": 0.4519,
"step": 397500
},
{
"epoch": 115.3,
"learning_rate": 2.1176129779837774e-05,
"loss": 0.4253,
"step": 398000
},
{
"epoch": 115.44,
"learning_rate": 2.1139918887601394e-05,
"loss": 0.4376,
"step": 398500
},
{
"epoch": 115.59,
"learning_rate": 2.1103707995365006e-05,
"loss": 0.4602,
"step": 399000
},
{
"epoch": 115.73,
"learning_rate": 2.1067497103128622e-05,
"loss": 0.4096,
"step": 399500
},
{
"epoch": 115.87,
"learning_rate": 2.103128621089224e-05,
"loss": 0.4173,
"step": 400000
},
{
"epoch": 116.02,
"learning_rate": 2.099507531865585e-05,
"loss": 0.4236,
"step": 400500
},
{
"epoch": 116.16,
"learning_rate": 2.0958864426419467e-05,
"loss": 0.3931,
"step": 401000
},
{
"epoch": 116.31,
"learning_rate": 2.0922653534183083e-05,
"loss": 0.4301,
"step": 401500
},
{
"epoch": 116.45,
"learning_rate": 2.08864426419467e-05,
"loss": 0.4355,
"step": 402000
},
{
"epoch": 116.6,
"learning_rate": 2.0850231749710312e-05,
"loss": 0.4015,
"step": 402500
},
{
"epoch": 116.74,
"learning_rate": 2.081402085747393e-05,
"loss": 0.3858,
"step": 403000
},
{
"epoch": 116.89,
"learning_rate": 2.0777809965237544e-05,
"loss": 0.4162,
"step": 403500
},
{
"epoch": 117.03,
"learning_rate": 2.074159907300116e-05,
"loss": 0.4381,
"step": 404000
},
{
"epoch": 117.18,
"learning_rate": 2.0705388180764777e-05,
"loss": 0.4383,
"step": 404500
},
{
"epoch": 117.32,
"learning_rate": 2.066917728852839e-05,
"loss": 0.3823,
"step": 405000
},
{
"epoch": 117.47,
"learning_rate": 2.0632966396292005e-05,
"loss": 0.4065,
"step": 405500
},
{
"epoch": 117.61,
"learning_rate": 2.059675550405562e-05,
"loss": 0.4262,
"step": 406000
},
{
"epoch": 117.76,
"learning_rate": 2.0560544611819234e-05,
"loss": 0.4056,
"step": 406500
},
{
"epoch": 117.9,
"learning_rate": 2.0524333719582854e-05,
"loss": 0.4442,
"step": 407000
},
{
"epoch": 118.05,
"learning_rate": 2.0488122827346466e-05,
"loss": 0.453,
"step": 407500
},
{
"epoch": 118.19,
"learning_rate": 2.0451911935110083e-05,
"loss": 0.4199,
"step": 408000
},
{
"epoch": 118.34,
"learning_rate": 2.04157010428737e-05,
"loss": 0.3881,
"step": 408500
},
{
"epoch": 118.48,
"learning_rate": 2.037949015063731e-05,
"loss": 0.4093,
"step": 409000
},
{
"epoch": 118.63,
"learning_rate": 2.0343279258400927e-05,
"loss": 0.3842,
"step": 409500
},
{
"epoch": 118.77,
"learning_rate": 2.0307068366164544e-05,
"loss": 0.3937,
"step": 410000
},
{
"epoch": 118.92,
"learning_rate": 2.027085747392816e-05,
"loss": 0.4245,
"step": 410500
},
{
"epoch": 119.06,
"learning_rate": 2.0234646581691776e-05,
"loss": 0.3956,
"step": 411000
},
{
"epoch": 119.21,
"learning_rate": 2.019843568945539e-05,
"loss": 0.3782,
"step": 411500
},
{
"epoch": 119.35,
"learning_rate": 2.0162224797219005e-05,
"loss": 0.3795,
"step": 412000
},
{
"epoch": 119.5,
"learning_rate": 2.0126013904982617e-05,
"loss": 0.3825,
"step": 412500
},
{
"epoch": 119.64,
"learning_rate": 2.0089803012746237e-05,
"loss": 0.3702,
"step": 413000
},
{
"epoch": 119.79,
"learning_rate": 2.005359212050985e-05,
"loss": 0.3629,
"step": 413500
},
{
"epoch": 119.93,
"learning_rate": 2.0017381228273466e-05,
"loss": 0.4095,
"step": 414000
},
{
"epoch": 120.08,
"learning_rate": 1.998117033603708e-05,
"loss": 0.3685,
"step": 414500
},
{
"epoch": 120.22,
"learning_rate": 1.9944959443800694e-05,
"loss": 0.4241,
"step": 415000
},
{
"epoch": 120.37,
"learning_rate": 1.990874855156431e-05,
"loss": 0.3681,
"step": 415500
},
{
"epoch": 120.51,
"learning_rate": 1.9872537659327926e-05,
"loss": 0.3791,
"step": 416000
},
{
"epoch": 120.65,
"learning_rate": 1.9836326767091543e-05,
"loss": 0.3634,
"step": 416500
},
{
"epoch": 120.8,
"learning_rate": 1.980011587485516e-05,
"loss": 0.4004,
"step": 417000
},
{
"epoch": 120.94,
"learning_rate": 1.976390498261877e-05,
"loss": 0.3745,
"step": 417500
},
{
"epoch": 121.09,
"learning_rate": 1.9727694090382387e-05,
"loss": 0.3886,
"step": 418000
},
{
"epoch": 121.23,
"learning_rate": 1.9691483198146004e-05,
"loss": 0.3633,
"step": 418500
},
{
"epoch": 121.38,
"learning_rate": 1.965527230590962e-05,
"loss": 0.3649,
"step": 419000
},
{
"epoch": 121.52,
"learning_rate": 1.9619061413673236e-05,
"loss": 0.3719,
"step": 419500
},
{
"epoch": 121.67,
"learning_rate": 1.958285052143685e-05,
"loss": 0.4032,
"step": 420000
},
{
"epoch": 121.81,
"learning_rate": 1.9546639629200465e-05,
"loss": 0.378,
"step": 420500
},
{
"epoch": 121.96,
"learning_rate": 1.9510428736964077e-05,
"loss": 0.3737,
"step": 421000
},
{
"epoch": 122.1,
"learning_rate": 1.9474217844727697e-05,
"loss": 0.3599,
"step": 421500
},
{
"epoch": 122.25,
"learning_rate": 1.943800695249131e-05,
"loss": 0.3806,
"step": 422000
},
{
"epoch": 122.39,
"learning_rate": 1.9401796060254926e-05,
"loss": 0.3456,
"step": 422500
},
{
"epoch": 122.54,
"learning_rate": 1.936558516801854e-05,
"loss": 0.3443,
"step": 423000
},
{
"epoch": 122.68,
"learning_rate": 1.9329374275782154e-05,
"loss": 0.3855,
"step": 423500
},
{
"epoch": 122.83,
"learning_rate": 1.929316338354577e-05,
"loss": 0.3958,
"step": 424000
},
{
"epoch": 122.97,
"learning_rate": 1.9256952491309387e-05,
"loss": 0.3696,
"step": 424500
},
{
"epoch": 123.12,
"learning_rate": 1.9220741599073003e-05,
"loss": 0.3616,
"step": 425000
},
{
"epoch": 123.26,
"learning_rate": 1.918453070683662e-05,
"loss": 0.356,
"step": 425500
},
{
"epoch": 123.41,
"learning_rate": 1.914831981460023e-05,
"loss": 0.4097,
"step": 426000
},
{
"epoch": 123.55,
"learning_rate": 1.9112108922363848e-05,
"loss": 0.3496,
"step": 426500
},
{
"epoch": 123.7,
"learning_rate": 1.9075898030127464e-05,
"loss": 0.3326,
"step": 427000
},
{
"epoch": 123.84,
"learning_rate": 1.903968713789108e-05,
"loss": 0.3807,
"step": 427500
},
{
"epoch": 123.99,
"learning_rate": 1.9003476245654696e-05,
"loss": 0.4034,
"step": 428000
},
{
"epoch": 124.13,
"learning_rate": 1.896726535341831e-05,
"loss": 0.3497,
"step": 428500
},
{
"epoch": 124.28,
"learning_rate": 1.8931054461181925e-05,
"loss": 0.3475,
"step": 429000
},
{
"epoch": 124.42,
"learning_rate": 1.8894843568945537e-05,
"loss": 0.3313,
"step": 429500
},
{
"epoch": 124.57,
"learning_rate": 1.8858632676709153e-05,
"loss": 0.369,
"step": 430000
},
{
"epoch": 124.71,
"learning_rate": 1.8822421784472773e-05,
"loss": 0.3487,
"step": 430500
},
{
"epoch": 124.86,
"learning_rate": 1.8786210892236386e-05,
"loss": 0.329,
"step": 431000
},
{
"epoch": 125.0,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.3298,
"step": 431500
},
{
"epoch": 125.14,
"learning_rate": 1.8713789107763614e-05,
"loss": 0.3246,
"step": 432000
},
{
"epoch": 125.29,
"learning_rate": 1.867757821552723e-05,
"loss": 0.3272,
"step": 432500
},
{
"epoch": 125.43,
"learning_rate": 1.8641367323290847e-05,
"loss": 0.3454,
"step": 433000
},
{
"epoch": 125.58,
"learning_rate": 1.8605156431054463e-05,
"loss": 0.3547,
"step": 433500
},
{
"epoch": 125.72,
"learning_rate": 1.856894553881808e-05,
"loss": 0.3334,
"step": 434000
},
{
"epoch": 125.87,
"learning_rate": 1.853273464658169e-05,
"loss": 0.3776,
"step": 434500
},
{
"epoch": 126.01,
"learning_rate": 1.8496523754345308e-05,
"loss": 0.3607,
"step": 435000
},
{
"epoch": 126.16,
"learning_rate": 1.8460312862108924e-05,
"loss": 0.329,
"step": 435500
},
{
"epoch": 126.3,
"learning_rate": 1.842410196987254e-05,
"loss": 0.33,
"step": 436000
},
{
"epoch": 126.45,
"learning_rate": 1.8387891077636156e-05,
"loss": 0.3466,
"step": 436500
},
{
"epoch": 126.59,
"learning_rate": 1.835168018539977e-05,
"loss": 0.3597,
"step": 437000
},
{
"epoch": 126.74,
"learning_rate": 1.8315469293163385e-05,
"loss": 0.3248,
"step": 437500
},
{
"epoch": 126.88,
"learning_rate": 1.8279258400926997e-05,
"loss": 0.3396,
"step": 438000
},
{
"epoch": 127.03,
"learning_rate": 1.8243047508690614e-05,
"loss": 0.3413,
"step": 438500
},
{
"epoch": 127.17,
"learning_rate": 1.8206836616454233e-05,
"loss": 0.3249,
"step": 439000
},
{
"epoch": 127.32,
"learning_rate": 1.8170625724217846e-05,
"loss": 0.3022,
"step": 439500
},
{
"epoch": 127.46,
"learning_rate": 1.8134414831981462e-05,
"loss": 0.308,
"step": 440000
},
{
"epoch": 127.61,
"learning_rate": 1.8098203939745075e-05,
"loss": 0.3446,
"step": 440500
},
{
"epoch": 127.75,
"learning_rate": 1.806199304750869e-05,
"loss": 0.3335,
"step": 441000
},
{
"epoch": 127.9,
"learning_rate": 1.8025782155272307e-05,
"loss": 0.335,
"step": 441500
},
{
"epoch": 128.04,
"learning_rate": 1.7989571263035923e-05,
"loss": 0.3059,
"step": 442000
},
{
"epoch": 128.19,
"learning_rate": 1.795336037079954e-05,
"loss": 0.3314,
"step": 442500
},
{
"epoch": 128.33,
"learning_rate": 1.791714947856315e-05,
"loss": 0.3004,
"step": 443000
},
{
"epoch": 128.48,
"learning_rate": 1.7880938586326768e-05,
"loss": 0.3249,
"step": 443500
},
{
"epoch": 128.62,
"learning_rate": 1.7844727694090384e-05,
"loss": 0.3198,
"step": 444000
},
{
"epoch": 128.77,
"learning_rate": 1.7808516801853997e-05,
"loss": 0.3394,
"step": 444500
},
{
"epoch": 128.91,
"learning_rate": 1.7772305909617616e-05,
"loss": 0.3075,
"step": 445000
},
{
"epoch": 129.06,
"learning_rate": 1.773609501738123e-05,
"loss": 0.334,
"step": 445500
},
{
"epoch": 129.2,
"learning_rate": 1.7699884125144845e-05,
"loss": 0.306,
"step": 446000
},
{
"epoch": 129.35,
"learning_rate": 1.766367323290846e-05,
"loss": 0.3146,
"step": 446500
},
{
"epoch": 129.49,
"learning_rate": 1.7627462340672074e-05,
"loss": 0.3068,
"step": 447000
},
{
"epoch": 129.63,
"learning_rate": 1.7591251448435693e-05,
"loss": 0.3124,
"step": 447500
},
{
"epoch": 129.78,
"learning_rate": 1.7555040556199306e-05,
"loss": 0.3082,
"step": 448000
},
{
"epoch": 129.92,
"learning_rate": 1.7518829663962922e-05,
"loss": 0.3215,
"step": 448500
},
{
"epoch": 130.07,
"learning_rate": 1.7482618771726535e-05,
"loss": 0.3114,
"step": 449000
},
{
"epoch": 130.21,
"learning_rate": 1.744640787949015e-05,
"loss": 0.3228,
"step": 449500
},
{
"epoch": 130.36,
"learning_rate": 1.7410196987253767e-05,
"loss": 0.3281,
"step": 450000
},
{
"epoch": 130.5,
"learning_rate": 1.7373986095017383e-05,
"loss": 0.2865,
"step": 450500
},
{
"epoch": 130.65,
"learning_rate": 1.7337775202781e-05,
"loss": 0.3214,
"step": 451000
},
{
"epoch": 130.79,
"learning_rate": 1.730156431054461e-05,
"loss": 0.3105,
"step": 451500
},
{
"epoch": 130.94,
"learning_rate": 1.7265353418308228e-05,
"loss": 0.3095,
"step": 452000
},
{
"epoch": 131.08,
"learning_rate": 1.7229142526071844e-05,
"loss": 0.3034,
"step": 452500
},
{
"epoch": 131.23,
"learning_rate": 1.7192931633835457e-05,
"loss": 0.3057,
"step": 453000
},
{
"epoch": 131.37,
"learning_rate": 1.7156720741599076e-05,
"loss": 0.2957,
"step": 453500
},
{
"epoch": 131.52,
"learning_rate": 1.712050984936269e-05,
"loss": 0.312,
"step": 454000
},
{
"epoch": 131.66,
"learning_rate": 1.7084298957126305e-05,
"loss": 0.29,
"step": 454500
},
{
"epoch": 131.81,
"learning_rate": 1.704808806488992e-05,
"loss": 0.2816,
"step": 455000
},
{
"epoch": 131.95,
"learning_rate": 1.7011877172653534e-05,
"loss": 0.3185,
"step": 455500
},
{
"epoch": 132.1,
"learning_rate": 1.697566628041715e-05,
"loss": 0.298,
"step": 456000
},
{
"epoch": 132.24,
"learning_rate": 1.6939455388180766e-05,
"loss": 0.2794,
"step": 456500
},
{
"epoch": 132.39,
"learning_rate": 1.6903244495944382e-05,
"loss": 0.29,
"step": 457000
},
{
"epoch": 132.53,
"learning_rate": 1.6867033603707995e-05,
"loss": 0.3004,
"step": 457500
},
{
"epoch": 132.68,
"learning_rate": 1.683082271147161e-05,
"loss": 0.3047,
"step": 458000
},
{
"epoch": 132.82,
"learning_rate": 1.6794611819235227e-05,
"loss": 0.3057,
"step": 458500
},
{
"epoch": 132.97,
"learning_rate": 1.675840092699884e-05,
"loss": 0.2959,
"step": 459000
},
{
"epoch": 133.11,
"learning_rate": 1.672219003476246e-05,
"loss": 0.317,
"step": 459500
},
{
"epoch": 133.26,
"learning_rate": 1.6685979142526072e-05,
"loss": 0.3087,
"step": 460000
},
{
"epoch": 133.4,
"learning_rate": 1.6649768250289688e-05,
"loss": 0.2817,
"step": 460500
},
{
"epoch": 133.55,
"learning_rate": 1.6613557358053304e-05,
"loss": 0.2774,
"step": 461000
},
{
"epoch": 133.69,
"learning_rate": 1.6577346465816917e-05,
"loss": 0.2932,
"step": 461500
},
{
"epoch": 133.84,
"learning_rate": 1.6541135573580536e-05,
"loss": 0.2784,
"step": 462000
},
{
"epoch": 133.98,
"learning_rate": 1.650492468134415e-05,
"loss": 0.3047,
"step": 462500
},
{
"epoch": 134.13,
"learning_rate": 1.6468713789107765e-05,
"loss": 0.2939,
"step": 463000
},
{
"epoch": 134.27,
"learning_rate": 1.643250289687138e-05,
"loss": 0.2729,
"step": 463500
},
{
"epoch": 134.41,
"learning_rate": 1.6396292004634994e-05,
"loss": 0.2736,
"step": 464000
},
{
"epoch": 134.56,
"learning_rate": 1.636008111239861e-05,
"loss": 0.2993,
"step": 464500
},
{
"epoch": 134.7,
"learning_rate": 1.6323870220162226e-05,
"loss": 0.285,
"step": 465000
},
{
"epoch": 134.85,
"learning_rate": 1.6287659327925842e-05,
"loss": 0.2693,
"step": 465500
},
{
"epoch": 134.99,
"learning_rate": 1.6251448435689455e-05,
"loss": 0.2976,
"step": 466000
},
{
"epoch": 135.14,
"learning_rate": 1.621523754345307e-05,
"loss": 0.2804,
"step": 466500
},
{
"epoch": 135.28,
"learning_rate": 1.6179026651216687e-05,
"loss": 0.2843,
"step": 467000
},
{
"epoch": 135.43,
"learning_rate": 1.61428157589803e-05,
"loss": 0.2744,
"step": 467500
},
{
"epoch": 135.57,
"learning_rate": 1.610660486674392e-05,
"loss": 0.2793,
"step": 468000
},
{
"epoch": 135.72,
"learning_rate": 1.6070393974507532e-05,
"loss": 0.285,
"step": 468500
},
{
"epoch": 135.86,
"learning_rate": 1.6034183082271148e-05,
"loss": 0.2775,
"step": 469000
},
{
"epoch": 136.01,
"learning_rate": 1.5997972190034764e-05,
"loss": 0.2836,
"step": 469500
},
{
"epoch": 136.15,
"learning_rate": 1.5961761297798377e-05,
"loss": 0.2635,
"step": 470000
},
{
"epoch": 136.3,
"learning_rate": 1.5925550405561993e-05,
"loss": 0.2745,
"step": 470500
},
{
"epoch": 136.44,
"learning_rate": 1.588933951332561e-05,
"loss": 0.264,
"step": 471000
},
{
"epoch": 136.59,
"learning_rate": 1.5853128621089225e-05,
"loss": 0.2761,
"step": 471500
},
{
"epoch": 136.73,
"learning_rate": 1.581691772885284e-05,
"loss": 0.2966,
"step": 472000
},
{
"epoch": 136.88,
"learning_rate": 1.5780706836616454e-05,
"loss": 0.2563,
"step": 472500
},
{
"epoch": 137.02,
"learning_rate": 1.574449594438007e-05,
"loss": 0.3046,
"step": 473000
},
{
"epoch": 137.17,
"learning_rate": 1.5708285052143686e-05,
"loss": 0.2558,
"step": 473500
},
{
"epoch": 137.31,
"learning_rate": 1.5672074159907302e-05,
"loss": 0.2572,
"step": 474000
},
{
"epoch": 137.46,
"learning_rate": 1.5635863267670918e-05,
"loss": 0.2545,
"step": 474500
},
{
"epoch": 137.6,
"learning_rate": 1.559965237543453e-05,
"loss": 0.2851,
"step": 475000
},
{
"epoch": 137.75,
"learning_rate": 1.5563441483198147e-05,
"loss": 0.2573,
"step": 475500
},
{
"epoch": 137.89,
"learning_rate": 1.552723059096176e-05,
"loss": 0.3079,
"step": 476000
},
{
"epoch": 138.04,
"learning_rate": 1.549101969872538e-05,
"loss": 0.2508,
"step": 476500
},
{
"epoch": 138.18,
"learning_rate": 1.5454808806488992e-05,
"loss": 0.2452,
"step": 477000
},
{
"epoch": 138.33,
"learning_rate": 1.5418597914252608e-05,
"loss": 0.2633,
"step": 477500
},
{
"epoch": 138.47,
"learning_rate": 1.5382387022016224e-05,
"loss": 0.2569,
"step": 478000
},
{
"epoch": 138.62,
"learning_rate": 1.5346176129779837e-05,
"loss": 0.2591,
"step": 478500
},
{
"epoch": 138.76,
"learning_rate": 1.5309965237543453e-05,
"loss": 0.272,
"step": 479000
},
{
"epoch": 138.9,
"learning_rate": 1.527375434530707e-05,
"loss": 0.2495,
"step": 479500
},
{
"epoch": 139.05,
"learning_rate": 1.5237543453070685e-05,
"loss": 0.2988,
"step": 480000
},
{
"epoch": 139.19,
"learning_rate": 1.52013325608343e-05,
"loss": 0.255,
"step": 480500
},
{
"epoch": 139.34,
"learning_rate": 1.5165121668597914e-05,
"loss": 0.2734,
"step": 481000
},
{
"epoch": 139.48,
"learning_rate": 1.512891077636153e-05,
"loss": 0.2574,
"step": 481500
},
{
"epoch": 139.63,
"learning_rate": 1.5092699884125144e-05,
"loss": 0.2649,
"step": 482000
},
{
"epoch": 139.77,
"learning_rate": 1.5056488991888762e-05,
"loss": 0.2445,
"step": 482500
},
{
"epoch": 139.92,
"learning_rate": 1.5020278099652377e-05,
"loss": 0.2909,
"step": 483000
},
{
"epoch": 140.06,
"learning_rate": 1.4984067207415991e-05,
"loss": 0.2669,
"step": 483500
},
{
"epoch": 140.21,
"learning_rate": 1.4947856315179607e-05,
"loss": 0.2534,
"step": 484000
},
{
"epoch": 140.35,
"learning_rate": 1.4911645422943221e-05,
"loss": 0.2586,
"step": 484500
},
{
"epoch": 140.5,
"learning_rate": 1.4875434530706836e-05,
"loss": 0.2407,
"step": 485000
},
{
"epoch": 140.64,
"learning_rate": 1.4839223638470454e-05,
"loss": 0.2306,
"step": 485500
},
{
"epoch": 140.79,
"learning_rate": 1.4803012746234068e-05,
"loss": 0.2588,
"step": 486000
},
{
"epoch": 140.93,
"learning_rate": 1.4766801853997682e-05,
"loss": 0.249,
"step": 486500
},
{
"epoch": 141.08,
"learning_rate": 1.4730590961761299e-05,
"loss": 0.2863,
"step": 487000
},
{
"epoch": 141.22,
"learning_rate": 1.4694380069524913e-05,
"loss": 0.2298,
"step": 487500
},
{
"epoch": 141.37,
"learning_rate": 1.465816917728853e-05,
"loss": 0.226,
"step": 488000
},
{
"epoch": 141.51,
"learning_rate": 1.4621958285052145e-05,
"loss": 0.2447,
"step": 488500
},
{
"epoch": 141.66,
"learning_rate": 1.458574739281576e-05,
"loss": 0.2686,
"step": 489000
},
{
"epoch": 141.8,
"learning_rate": 1.4549536500579376e-05,
"loss": 0.2503,
"step": 489500
},
{
"epoch": 141.95,
"learning_rate": 1.451332560834299e-05,
"loss": 0.2746,
"step": 490000
},
{
"epoch": 142.09,
"learning_rate": 1.4477114716106604e-05,
"loss": 0.2151,
"step": 490500
},
{
"epoch": 142.24,
"learning_rate": 1.4440903823870222e-05,
"loss": 0.2437,
"step": 491000
},
{
"epoch": 142.38,
"learning_rate": 1.4404692931633837e-05,
"loss": 0.2303,
"step": 491500
},
{
"epoch": 142.53,
"learning_rate": 1.4368482039397451e-05,
"loss": 0.2443,
"step": 492000
},
{
"epoch": 142.67,
"learning_rate": 1.4332271147161067e-05,
"loss": 0.2637,
"step": 492500
},
{
"epoch": 142.82,
"learning_rate": 1.4296060254924682e-05,
"loss": 0.2514,
"step": 493000
},
{
"epoch": 142.96,
"learning_rate": 1.4259849362688296e-05,
"loss": 0.2576,
"step": 493500
},
{
"epoch": 143.11,
"learning_rate": 1.4223638470451914e-05,
"loss": 0.2524,
"step": 494000
},
{
"epoch": 143.25,
"learning_rate": 1.4187427578215528e-05,
"loss": 0.2366,
"step": 494500
},
{
"epoch": 143.4,
"learning_rate": 1.4151216685979144e-05,
"loss": 0.2462,
"step": 495000
},
{
"epoch": 143.54,
"learning_rate": 1.4115005793742759e-05,
"loss": 0.2367,
"step": 495500
},
{
"epoch": 143.68,
"learning_rate": 1.4078794901506373e-05,
"loss": 0.2321,
"step": 496000
},
{
"epoch": 143.83,
"learning_rate": 1.4042584009269987e-05,
"loss": 0.2467,
"step": 496500
},
{
"epoch": 143.97,
"learning_rate": 1.4006373117033605e-05,
"loss": 0.229,
"step": 497000
},
{
"epoch": 144.12,
"learning_rate": 1.397016222479722e-05,
"loss": 0.237,
"step": 497500
},
{
"epoch": 144.26,
"learning_rate": 1.3933951332560836e-05,
"loss": 0.2555,
"step": 498000
},
{
"epoch": 144.41,
"learning_rate": 1.389774044032445e-05,
"loss": 0.2545,
"step": 498500
},
{
"epoch": 144.55,
"learning_rate": 1.3861529548088065e-05,
"loss": 0.2443,
"step": 499000
},
{
"epoch": 144.7,
"learning_rate": 1.3825318655851679e-05,
"loss": 0.2723,
"step": 499500
},
{
"epoch": 144.84,
"learning_rate": 1.3789107763615297e-05,
"loss": 0.2444,
"step": 500000
},
{
"epoch": 144.99,
"learning_rate": 1.3752896871378911e-05,
"loss": 0.2326,
"step": 500500
},
{
"epoch": 145.13,
"learning_rate": 1.3716685979142527e-05,
"loss": 0.2243,
"step": 501000
},
{
"epoch": 145.28,
"learning_rate": 1.3680475086906142e-05,
"loss": 0.2313,
"step": 501500
},
{
"epoch": 145.42,
"learning_rate": 1.3644264194669756e-05,
"loss": 0.2395,
"step": 502000
},
{
"epoch": 145.57,
"learning_rate": 1.3608053302433374e-05,
"loss": 0.2327,
"step": 502500
},
{
"epoch": 145.71,
"learning_rate": 1.3571842410196988e-05,
"loss": 0.2246,
"step": 503000
},
{
"epoch": 145.86,
"learning_rate": 1.3535631517960604e-05,
"loss": 0.2169,
"step": 503500
},
{
"epoch": 146.0,
"learning_rate": 1.3499420625724219e-05,
"loss": 0.2386,
"step": 504000
},
{
"epoch": 146.15,
"learning_rate": 1.3463209733487833e-05,
"loss": 0.238,
"step": 504500
},
{
"epoch": 146.29,
"learning_rate": 1.3426998841251448e-05,
"loss": 0.2387,
"step": 505000
},
{
"epoch": 146.44,
"learning_rate": 1.3390787949015065e-05,
"loss": 0.2223,
"step": 505500
},
{
"epoch": 146.58,
"learning_rate": 1.335457705677868e-05,
"loss": 0.2103,
"step": 506000
},
{
"epoch": 146.73,
"learning_rate": 1.3318366164542296e-05,
"loss": 0.2209,
"step": 506500
},
{
"epoch": 146.87,
"learning_rate": 1.328215527230591e-05,
"loss": 0.2444,
"step": 507000
},
{
"epoch": 147.02,
"learning_rate": 1.3245944380069525e-05,
"loss": 0.2337,
"step": 507500
},
{
"epoch": 147.16,
"learning_rate": 1.3209733487833139e-05,
"loss": 0.2242,
"step": 508000
},
{
"epoch": 147.31,
"learning_rate": 1.3173522595596757e-05,
"loss": 0.2305,
"step": 508500
},
{
"epoch": 147.45,
"learning_rate": 1.3137311703360373e-05,
"loss": 0.216,
"step": 509000
},
{
"epoch": 147.6,
"learning_rate": 1.3101100811123987e-05,
"loss": 0.2132,
"step": 509500
},
{
"epoch": 147.74,
"learning_rate": 1.3064889918887602e-05,
"loss": 0.2571,
"step": 510000
},
{
"epoch": 147.89,
"learning_rate": 1.3028679026651216e-05,
"loss": 0.2287,
"step": 510500
},
{
"epoch": 148.03,
"learning_rate": 1.299246813441483e-05,
"loss": 0.2344,
"step": 511000
},
{
"epoch": 148.17,
"learning_rate": 1.2956257242178448e-05,
"loss": 0.2202,
"step": 511500
},
{
"epoch": 148.32,
"learning_rate": 1.2920046349942064e-05,
"loss": 0.1859,
"step": 512000
},
{
"epoch": 148.46,
"learning_rate": 1.2883835457705679e-05,
"loss": 0.1972,
"step": 512500
},
{
"epoch": 148.61,
"learning_rate": 1.2847624565469293e-05,
"loss": 0.2232,
"step": 513000
},
{
"epoch": 148.75,
"learning_rate": 1.2811413673232908e-05,
"loss": 0.2051,
"step": 513500
},
{
"epoch": 148.9,
"learning_rate": 1.2775202780996524e-05,
"loss": 0.2272,
"step": 514000
},
{
"epoch": 149.04,
"learning_rate": 1.273899188876014e-05,
"loss": 0.1866,
"step": 514500
},
{
"epoch": 149.19,
"learning_rate": 1.2702780996523756e-05,
"loss": 0.1998,
"step": 515000
},
{
"epoch": 149.33,
"learning_rate": 1.266657010428737e-05,
"loss": 0.1948,
"step": 515500
},
{
"epoch": 149.48,
"learning_rate": 1.2630359212050985e-05,
"loss": 0.2025,
"step": 516000
},
{
"epoch": 149.62,
"learning_rate": 1.2594148319814599e-05,
"loss": 0.2197,
"step": 516500
},
{
"epoch": 149.77,
"learning_rate": 1.2557937427578217e-05,
"loss": 0.2142,
"step": 517000
},
{
"epoch": 149.91,
"learning_rate": 1.2521726535341833e-05,
"loss": 0.2137,
"step": 517500
},
{
"epoch": 150.06,
"learning_rate": 1.2485515643105447e-05,
"loss": 0.2164,
"step": 518000
},
{
"epoch": 150.2,
"learning_rate": 1.2449304750869062e-05,
"loss": 0.1961,
"step": 518500
},
{
"epoch": 150.35,
"learning_rate": 1.2413093858632676e-05,
"loss": 0.2187,
"step": 519000
},
{
"epoch": 150.49,
"learning_rate": 1.2376882966396292e-05,
"loss": 0.1937,
"step": 519500
},
{
"epoch": 150.64,
"learning_rate": 1.2340672074159908e-05,
"loss": 0.2133,
"step": 520000
},
{
"epoch": 150.78,
"learning_rate": 1.2304461181923523e-05,
"loss": 0.2468,
"step": 520500
},
{
"epoch": 150.93,
"learning_rate": 1.2268250289687139e-05,
"loss": 0.2023,
"step": 521000
},
{
"epoch": 151.07,
"learning_rate": 1.2232039397450753e-05,
"loss": 0.2029,
"step": 521500
},
{
"epoch": 151.22,
"learning_rate": 1.219582850521437e-05,
"loss": 0.2073,
"step": 522000
},
{
"epoch": 151.36,
"learning_rate": 1.2159617612977984e-05,
"loss": 0.2003,
"step": 522500
},
{
"epoch": 151.51,
"learning_rate": 1.21234067207416e-05,
"loss": 0.2107,
"step": 523000
},
{
"epoch": 151.65,
"learning_rate": 1.2087195828505216e-05,
"loss": 0.2126,
"step": 523500
},
{
"epoch": 151.8,
"learning_rate": 1.205098493626883e-05,
"loss": 0.2001,
"step": 524000
},
{
"epoch": 151.94,
"learning_rate": 1.2014774044032445e-05,
"loss": 0.228,
"step": 524500
},
{
"epoch": 152.09,
"learning_rate": 1.1978563151796061e-05,
"loss": 0.2013,
"step": 525000
},
{
"epoch": 152.23,
"learning_rate": 1.1942352259559677e-05,
"loss": 0.2065,
"step": 525500
},
{
"epoch": 152.38,
"learning_rate": 1.1906141367323291e-05,
"loss": 0.1917,
"step": 526000
},
{
"epoch": 152.52,
"learning_rate": 1.1869930475086907e-05,
"loss": 0.2089,
"step": 526500
},
{
"epoch": 152.67,
"learning_rate": 1.1833719582850522e-05,
"loss": 0.1965,
"step": 527000
},
{
"epoch": 152.81,
"learning_rate": 1.1797508690614136e-05,
"loss": 0.2153,
"step": 527500
},
{
"epoch": 152.95,
"learning_rate": 1.1761297798377752e-05,
"loss": 0.1984,
"step": 528000
},
{
"epoch": 153.1,
"learning_rate": 1.1725086906141368e-05,
"loss": 0.1944,
"step": 528500
},
{
"epoch": 153.24,
"learning_rate": 1.1688876013904983e-05,
"loss": 0.2069,
"step": 529000
},
{
"epoch": 153.39,
"learning_rate": 1.1652665121668599e-05,
"loss": 0.1943,
"step": 529500
},
{
"epoch": 153.53,
"learning_rate": 1.1616454229432213e-05,
"loss": 0.1839,
"step": 530000
},
{
"epoch": 153.68,
"learning_rate": 1.1580243337195828e-05,
"loss": 0.2062,
"step": 530500
},
{
"epoch": 153.82,
"learning_rate": 1.1544032444959446e-05,
"loss": 0.2067,
"step": 531000
},
{
"epoch": 153.97,
"learning_rate": 1.150782155272306e-05,
"loss": 0.2113,
"step": 531500
},
{
"epoch": 154.11,
"learning_rate": 1.1471610660486674e-05,
"loss": 0.2214,
"step": 532000
},
{
"epoch": 154.26,
"learning_rate": 1.143539976825029e-05,
"loss": 0.203,
"step": 532500
},
{
"epoch": 154.4,
"learning_rate": 1.1399188876013905e-05,
"loss": 0.1725,
"step": 533000
},
{
"epoch": 154.55,
"learning_rate": 1.1362977983777521e-05,
"loss": 0.1898,
"step": 533500
},
{
"epoch": 154.69,
"learning_rate": 1.1326767091541137e-05,
"loss": 0.2152,
"step": 534000
},
{
"epoch": 154.84,
"learning_rate": 1.1290556199304751e-05,
"loss": 0.1984,
"step": 534500
},
{
"epoch": 154.98,
"learning_rate": 1.1254345307068366e-05,
"loss": 0.2074,
"step": 535000
},
{
"epoch": 155.13,
"learning_rate": 1.1218134414831982e-05,
"loss": 0.203,
"step": 535500
},
{
"epoch": 155.27,
"learning_rate": 1.1181923522595596e-05,
"loss": 0.2135,
"step": 536000
},
{
"epoch": 155.42,
"learning_rate": 1.1145712630359212e-05,
"loss": 0.191,
"step": 536500
},
{
"epoch": 155.56,
"learning_rate": 1.1109501738122829e-05,
"loss": 0.1882,
"step": 537000
},
{
"epoch": 155.71,
"learning_rate": 1.1073290845886443e-05,
"loss": 0.1999,
"step": 537500
},
{
"epoch": 155.85,
"learning_rate": 1.1037079953650059e-05,
"loss": 0.1769,
"step": 538000
},
{
"epoch": 156.0,
"learning_rate": 1.1000869061413673e-05,
"loss": 0.1962,
"step": 538500
},
{
"epoch": 156.14,
"learning_rate": 1.096465816917729e-05,
"loss": 0.1978,
"step": 539000
},
{
"epoch": 156.29,
"learning_rate": 1.0928447276940906e-05,
"loss": 0.1693,
"step": 539500
},
{
"epoch": 156.43,
"learning_rate": 1.089223638470452e-05,
"loss": 0.1777,
"step": 540000
},
{
"epoch": 156.58,
"learning_rate": 1.0856025492468134e-05,
"loss": 0.1803,
"step": 540500
},
{
"epoch": 156.72,
"learning_rate": 1.081981460023175e-05,
"loss": 0.1683,
"step": 541000
},
{
"epoch": 156.87,
"learning_rate": 1.0783603707995365e-05,
"loss": 0.1918,
"step": 541500
},
{
"epoch": 157.01,
"learning_rate": 1.0747392815758981e-05,
"loss": 0.1938,
"step": 542000
},
{
"epoch": 157.16,
"learning_rate": 1.0711181923522597e-05,
"loss": 0.1879,
"step": 542500
},
{
"epoch": 157.3,
"learning_rate": 1.0674971031286211e-05,
"loss": 0.1927,
"step": 543000
},
{
"epoch": 157.44,
"learning_rate": 1.0638760139049826e-05,
"loss": 0.1724,
"step": 543500
},
{
"epoch": 157.59,
"learning_rate": 1.0602549246813442e-05,
"loss": 0.1943,
"step": 544000
},
{
"epoch": 157.73,
"learning_rate": 1.0566338354577056e-05,
"loss": 0.172,
"step": 544500
},
{
"epoch": 157.88,
"learning_rate": 1.0530127462340672e-05,
"loss": 0.1979,
"step": 545000
},
{
"epoch": 158.02,
"learning_rate": 1.0493916570104289e-05,
"loss": 0.2234,
"step": 545500
},
{
"epoch": 158.17,
"learning_rate": 1.0457705677867903e-05,
"loss": 0.1712,
"step": 546000
},
{
"epoch": 158.31,
"learning_rate": 1.0421494785631517e-05,
"loss": 0.2199,
"step": 546500
},
{
"epoch": 158.46,
"learning_rate": 1.0385283893395133e-05,
"loss": 0.1846,
"step": 547000
},
{
"epoch": 158.6,
"learning_rate": 1.034907300115875e-05,
"loss": 0.1832,
"step": 547500
},
{
"epoch": 158.75,
"learning_rate": 1.0312862108922364e-05,
"loss": 0.1684,
"step": 548000
},
{
"epoch": 158.89,
"learning_rate": 1.027665121668598e-05,
"loss": 0.1741,
"step": 548500
},
{
"epoch": 159.04,
"learning_rate": 1.0240440324449594e-05,
"loss": 0.1623,
"step": 549000
},
{
"epoch": 159.18,
"learning_rate": 1.0204229432213209e-05,
"loss": 0.1776,
"step": 549500
},
{
"epoch": 159.33,
"learning_rate": 1.0168018539976825e-05,
"loss": 0.1863,
"step": 550000
},
{
"epoch": 159.47,
"learning_rate": 1.0131807647740441e-05,
"loss": 0.1685,
"step": 550500
},
{
"epoch": 159.62,
"learning_rate": 1.0095596755504057e-05,
"loss": 0.1973,
"step": 551000
},
{
"epoch": 159.76,
"learning_rate": 1.0059385863267672e-05,
"loss": 0.1711,
"step": 551500
},
{
"epoch": 159.91,
"learning_rate": 1.0023174971031286e-05,
"loss": 0.1814,
"step": 552000
},
{
"epoch": 160.05,
"learning_rate": 9.986964078794902e-06,
"loss": 0.167,
"step": 552500
},
{
"epoch": 160.2,
"learning_rate": 9.950753186558518e-06,
"loss": 0.1938,
"step": 553000
},
{
"epoch": 160.34,
"learning_rate": 9.914542294322133e-06,
"loss": 0.167,
"step": 553500
},
{
"epoch": 160.49,
"learning_rate": 9.878331402085749e-06,
"loss": 0.1617,
"step": 554000
},
{
"epoch": 160.63,
"learning_rate": 9.842120509849363e-06,
"loss": 0.172,
"step": 554500
},
{
"epoch": 160.78,
"learning_rate": 9.805909617612977e-06,
"loss": 0.1737,
"step": 555000
},
{
"epoch": 160.92,
"learning_rate": 9.769698725376594e-06,
"loss": 0.1816,
"step": 555500
},
{
"epoch": 161.07,
"learning_rate": 9.73348783314021e-06,
"loss": 0.1628,
"step": 556000
},
{
"epoch": 161.21,
"learning_rate": 9.697276940903824e-06,
"loss": 0.1696,
"step": 556500
},
{
"epoch": 161.36,
"learning_rate": 9.66106604866744e-06,
"loss": 0.1656,
"step": 557000
},
{
"epoch": 161.5,
"learning_rate": 9.624855156431055e-06,
"loss": 0.1559,
"step": 557500
},
{
"epoch": 161.65,
"learning_rate": 9.588644264194669e-06,
"loss": 0.172,
"step": 558000
},
{
"epoch": 161.79,
"learning_rate": 9.552433371958287e-06,
"loss": 0.1878,
"step": 558500
},
{
"epoch": 161.94,
"learning_rate": 9.516222479721901e-06,
"loss": 0.1682,
"step": 559000
},
{
"epoch": 162.08,
"learning_rate": 9.480011587485516e-06,
"loss": 0.1755,
"step": 559500
},
{
"epoch": 162.22,
"learning_rate": 9.443800695249132e-06,
"loss": 0.2092,
"step": 560000
},
{
"epoch": 162.37,
"learning_rate": 9.407589803012746e-06,
"loss": 0.1784,
"step": 560500
},
{
"epoch": 162.51,
"learning_rate": 9.371378910776362e-06,
"loss": 0.154,
"step": 561000
},
{
"epoch": 162.66,
"learning_rate": 9.335168018539978e-06,
"loss": 0.1892,
"step": 561500
},
{
"epoch": 162.8,
"learning_rate": 9.298957126303593e-06,
"loss": 0.1679,
"step": 562000
},
{
"epoch": 162.95,
"learning_rate": 9.262746234067207e-06,
"loss": 0.1699,
"step": 562500
},
{
"epoch": 163.09,
"learning_rate": 9.226535341830823e-06,
"loss": 0.1553,
"step": 563000
},
{
"epoch": 163.24,
"learning_rate": 9.190324449594438e-06,
"loss": 0.1792,
"step": 563500
},
{
"epoch": 163.38,
"learning_rate": 9.154113557358054e-06,
"loss": 0.1611,
"step": 564000
},
{
"epoch": 163.53,
"learning_rate": 9.11790266512167e-06,
"loss": 0.1682,
"step": 564500
},
{
"epoch": 163.67,
"learning_rate": 9.081691772885284e-06,
"loss": 0.1765,
"step": 565000
},
{
"epoch": 163.82,
"learning_rate": 9.0454808806489e-06,
"loss": 0.1928,
"step": 565500
},
{
"epoch": 163.96,
"learning_rate": 9.009269988412515e-06,
"loss": 0.1758,
"step": 566000
},
{
"epoch": 164.11,
"learning_rate": 8.97305909617613e-06,
"loss": 0.1599,
"step": 566500
},
{
"epoch": 164.25,
"learning_rate": 8.936848203939747e-06,
"loss": 0.1793,
"step": 567000
},
{
"epoch": 164.4,
"learning_rate": 8.900637311703361e-06,
"loss": 0.151,
"step": 567500
},
{
"epoch": 164.54,
"learning_rate": 8.864426419466976e-06,
"loss": 0.1545,
"step": 568000
},
{
"epoch": 164.69,
"learning_rate": 8.828215527230592e-06,
"loss": 0.1745,
"step": 568500
},
{
"epoch": 164.83,
"learning_rate": 8.792004634994206e-06,
"loss": 0.175,
"step": 569000
},
{
"epoch": 164.98,
"learning_rate": 8.755793742757822e-06,
"loss": 0.1685,
"step": 569500
},
{
"epoch": 165.12,
"learning_rate": 8.719582850521438e-06,
"loss": 0.1714,
"step": 570000
},
{
"epoch": 165.27,
"learning_rate": 8.683371958285053e-06,
"loss": 0.1596,
"step": 570500
},
{
"epoch": 165.41,
"learning_rate": 8.647161066048667e-06,
"loss": 0.1514,
"step": 571000
},
{
"epoch": 165.56,
"learning_rate": 8.610950173812283e-06,
"loss": 0.1698,
"step": 571500
},
{
"epoch": 165.7,
"learning_rate": 8.574739281575898e-06,
"loss": 0.1652,
"step": 572000
},
{
"epoch": 165.85,
"learning_rate": 8.538528389339514e-06,
"loss": 0.1615,
"step": 572500
},
{
"epoch": 165.99,
"learning_rate": 8.50231749710313e-06,
"loss": 0.1452,
"step": 573000
},
{
"epoch": 166.14,
"learning_rate": 8.466106604866744e-06,
"loss": 0.1636,
"step": 573500
},
{
"epoch": 166.28,
"learning_rate": 8.429895712630359e-06,
"loss": 0.165,
"step": 574000
},
{
"epoch": 166.43,
"learning_rate": 8.393684820393975e-06,
"loss": 0.1494,
"step": 574500
},
{
"epoch": 166.57,
"learning_rate": 8.35747392815759e-06,
"loss": 0.1576,
"step": 575000
},
{
"epoch": 166.71,
"learning_rate": 8.321263035921205e-06,
"loss": 0.1632,
"step": 575500
},
{
"epoch": 166.86,
"learning_rate": 8.285052143684821e-06,
"loss": 0.16,
"step": 576000
},
{
"epoch": 167.0,
"learning_rate": 8.248841251448436e-06,
"loss": 0.1682,
"step": 576500
},
{
"epoch": 167.15,
"learning_rate": 8.21263035921205e-06,
"loss": 0.1602,
"step": 577000
},
{
"epoch": 167.29,
"learning_rate": 8.176419466975666e-06,
"loss": 0.1463,
"step": 577500
},
{
"epoch": 167.44,
"learning_rate": 8.140208574739282e-06,
"loss": 0.1599,
"step": 578000
},
{
"epoch": 167.58,
"learning_rate": 8.103997682502898e-06,
"loss": 0.1446,
"step": 578500
},
{
"epoch": 167.73,
"learning_rate": 8.067786790266513e-06,
"loss": 0.1541,
"step": 579000
},
{
"epoch": 167.87,
"learning_rate": 8.031575898030127e-06,
"loss": 0.1572,
"step": 579500
},
{
"epoch": 168.02,
"learning_rate": 7.995365005793743e-06,
"loss": 0.1458,
"step": 580000
},
{
"epoch": 168.16,
"learning_rate": 7.95915411355736e-06,
"loss": 0.1426,
"step": 580500
},
{
"epoch": 168.31,
"learning_rate": 7.922943221320974e-06,
"loss": 0.1396,
"step": 581000
},
{
"epoch": 168.45,
"learning_rate": 7.88673232908459e-06,
"loss": 0.1583,
"step": 581500
},
{
"epoch": 168.6,
"learning_rate": 7.850521436848204e-06,
"loss": 0.1841,
"step": 582000
},
{
"epoch": 168.74,
"learning_rate": 7.814310544611819e-06,
"loss": 0.1544,
"step": 582500
},
{
"epoch": 168.89,
"learning_rate": 7.778099652375435e-06,
"loss": 0.167,
"step": 583000
},
{
"epoch": 169.03,
"learning_rate": 7.741888760139051e-06,
"loss": 0.1597,
"step": 583500
},
{
"epoch": 169.18,
"learning_rate": 7.705677867902665e-06,
"loss": 0.1523,
"step": 584000
},
{
"epoch": 169.32,
"learning_rate": 7.669466975666281e-06,
"loss": 0.1497,
"step": 584500
},
{
"epoch": 169.47,
"learning_rate": 7.633256083429896e-06,
"loss": 0.1541,
"step": 585000
},
{
"epoch": 169.61,
"learning_rate": 7.597045191193511e-06,
"loss": 0.1471,
"step": 585500
},
{
"epoch": 169.76,
"learning_rate": 7.560834298957127e-06,
"loss": 0.1812,
"step": 586000
},
{
"epoch": 169.9,
"learning_rate": 7.5246234067207415e-06,
"loss": 0.1556,
"step": 586500
},
{
"epoch": 170.05,
"learning_rate": 7.488412514484357e-06,
"loss": 0.1353,
"step": 587000
},
{
"epoch": 170.19,
"learning_rate": 7.452201622247973e-06,
"loss": 0.1334,
"step": 587500
},
{
"epoch": 170.34,
"learning_rate": 7.415990730011588e-06,
"loss": 0.1455,
"step": 588000
},
{
"epoch": 170.48,
"learning_rate": 7.3797798377752025e-06,
"loss": 0.1629,
"step": 588500
},
{
"epoch": 170.63,
"learning_rate": 7.343568945538819e-06,
"loss": 0.1388,
"step": 589000
},
{
"epoch": 170.77,
"learning_rate": 7.307358053302434e-06,
"loss": 0.159,
"step": 589500
},
{
"epoch": 170.92,
"learning_rate": 7.271147161066048e-06,
"loss": 0.157,
"step": 590000
},
{
"epoch": 171.06,
"learning_rate": 7.234936268829664e-06,
"loss": 0.1471,
"step": 590500
},
{
"epoch": 171.21,
"learning_rate": 7.19872537659328e-06,
"loss": 0.1879,
"step": 591000
},
{
"epoch": 171.35,
"learning_rate": 7.162514484356894e-06,
"loss": 0.1651,
"step": 591500
},
{
"epoch": 171.49,
"learning_rate": 7.12630359212051e-06,
"loss": 0.1657,
"step": 592000
},
{
"epoch": 171.64,
"learning_rate": 7.090092699884125e-06,
"loss": 0.1521,
"step": 592500
},
{
"epoch": 171.78,
"learning_rate": 7.0538818076477414e-06,
"loss": 0.1404,
"step": 593000
},
{
"epoch": 171.93,
"learning_rate": 7.017670915411356e-06,
"loss": 0.138,
"step": 593500
},
{
"epoch": 172.07,
"learning_rate": 6.981460023174971e-06,
"loss": 0.1545,
"step": 594000
},
{
"epoch": 172.22,
"learning_rate": 6.945249130938587e-06,
"loss": 0.1472,
"step": 594500
},
{
"epoch": 172.36,
"learning_rate": 6.9090382387022024e-06,
"loss": 0.1593,
"step": 595000
},
{
"epoch": 172.51,
"learning_rate": 6.872827346465817e-06,
"loss": 0.1461,
"step": 595500
},
{
"epoch": 172.65,
"learning_rate": 6.836616454229433e-06,
"loss": 0.1279,
"step": 596000
},
{
"epoch": 172.8,
"learning_rate": 6.800405561993048e-06,
"loss": 0.135,
"step": 596500
},
{
"epoch": 172.94,
"learning_rate": 6.764194669756663e-06,
"loss": 0.14,
"step": 597000
},
{
"epoch": 173.09,
"learning_rate": 6.727983777520279e-06,
"loss": 0.1307,
"step": 597500
},
{
"epoch": 173.23,
"learning_rate": 6.691772885283894e-06,
"loss": 0.1356,
"step": 598000
},
{
"epoch": 173.38,
"learning_rate": 6.655561993047508e-06,
"loss": 0.1466,
"step": 598500
},
{
"epoch": 173.52,
"learning_rate": 6.6193511008111244e-06,
"loss": 0.1361,
"step": 599000
},
{
"epoch": 173.67,
"learning_rate": 6.58314020857474e-06,
"loss": 0.1336,
"step": 599500
},
{
"epoch": 173.81,
"learning_rate": 6.546929316338354e-06,
"loss": 0.129,
"step": 600000
},
{
"epoch": 173.96,
"learning_rate": 6.51071842410197e-06,
"loss": 0.1637,
"step": 600500
},
{
"epoch": 174.1,
"learning_rate": 6.474507531865585e-06,
"loss": 0.1632,
"step": 601000
},
{
"epoch": 174.25,
"learning_rate": 6.4382966396292e-06,
"loss": 0.151,
"step": 601500
},
{
"epoch": 174.39,
"learning_rate": 6.402085747392817e-06,
"loss": 0.1487,
"step": 602000
},
{
"epoch": 174.54,
"learning_rate": 6.365874855156431e-06,
"loss": 0.1335,
"step": 602500
},
{
"epoch": 174.68,
"learning_rate": 6.329663962920046e-06,
"loss": 0.1338,
"step": 603000
},
{
"epoch": 174.83,
"learning_rate": 6.2934530706836625e-06,
"loss": 0.1461,
"step": 603500
},
{
"epoch": 174.97,
"learning_rate": 6.257242178447277e-06,
"loss": 0.1413,
"step": 604000
},
{
"epoch": 175.12,
"learning_rate": 6.221031286210892e-06,
"loss": 0.1545,
"step": 604500
},
{
"epoch": 175.26,
"learning_rate": 6.184820393974508e-06,
"loss": 0.1615,
"step": 605000
},
{
"epoch": 175.41,
"learning_rate": 6.148609501738123e-06,
"loss": 0.1325,
"step": 605500
},
{
"epoch": 175.55,
"learning_rate": 6.112398609501739e-06,
"loss": 0.1398,
"step": 606000
},
{
"epoch": 175.7,
"learning_rate": 6.076187717265354e-06,
"loss": 0.1413,
"step": 606500
},
{
"epoch": 175.84,
"learning_rate": 6.039976825028968e-06,
"loss": 0.159,
"step": 607000
},
{
"epoch": 175.98,
"learning_rate": 6.0037659327925845e-06,
"loss": 0.1591,
"step": 607500
},
{
"epoch": 176.13,
"learning_rate": 5.9675550405562e-06,
"loss": 0.141,
"step": 608000
},
{
"epoch": 176.27,
"learning_rate": 5.931344148319814e-06,
"loss": 0.1246,
"step": 608500
},
{
"epoch": 176.42,
"learning_rate": 5.89513325608343e-06,
"loss": 0.1274,
"step": 609000
},
{
"epoch": 176.56,
"learning_rate": 5.8589223638470455e-06,
"loss": 0.152,
"step": 609500
},
{
"epoch": 176.71,
"learning_rate": 5.822711471610661e-06,
"loss": 0.159,
"step": 610000
},
{
"epoch": 176.85,
"learning_rate": 5.786500579374276e-06,
"loss": 0.1337,
"step": 610500
},
{
"epoch": 177.0,
"learning_rate": 5.750289687137891e-06,
"loss": 0.1248,
"step": 611000
},
{
"epoch": 177.14,
"learning_rate": 5.7140787949015065e-06,
"loss": 0.1375,
"step": 611500
},
{
"epoch": 177.29,
"learning_rate": 5.677867902665122e-06,
"loss": 0.1386,
"step": 612000
},
{
"epoch": 177.43,
"learning_rate": 5.641657010428737e-06,
"loss": 0.1376,
"step": 612500
},
{
"epoch": 177.58,
"learning_rate": 5.605446118192353e-06,
"loss": 0.1288,
"step": 613000
},
{
"epoch": 177.72,
"learning_rate": 5.5692352259559675e-06,
"loss": 0.1624,
"step": 613500
},
{
"epoch": 177.87,
"learning_rate": 5.533024333719583e-06,
"loss": 0.1262,
"step": 614000
},
{
"epoch": 178.01,
"learning_rate": 5.496813441483199e-06,
"loss": 0.1688,
"step": 614500
},
{
"epoch": 178.16,
"learning_rate": 5.460602549246813e-06,
"loss": 0.1789,
"step": 615000
},
{
"epoch": 178.3,
"learning_rate": 5.4243916570104285e-06,
"loss": 0.1338,
"step": 615500
},
{
"epoch": 178.45,
"learning_rate": 5.3881807647740446e-06,
"loss": 0.14,
"step": 616000
},
{
"epoch": 178.59,
"learning_rate": 5.35196987253766e-06,
"loss": 0.1236,
"step": 616500
},
{
"epoch": 178.74,
"learning_rate": 5.315758980301275e-06,
"loss": 0.1449,
"step": 617000
},
{
"epoch": 178.88,
"learning_rate": 5.27954808806489e-06,
"loss": 0.129,
"step": 617500
},
{
"epoch": 179.03,
"learning_rate": 5.2433371958285056e-06,
"loss": 0.1373,
"step": 618000
},
{
"epoch": 179.17,
"learning_rate": 5.207126303592121e-06,
"loss": 0.121,
"step": 618500
},
{
"epoch": 179.32,
"learning_rate": 5.170915411355736e-06,
"loss": 0.1285,
"step": 619000
},
{
"epoch": 179.46,
"learning_rate": 5.134704519119351e-06,
"loss": 0.1462,
"step": 619500
},
{
"epoch": 179.61,
"learning_rate": 5.0984936268829666e-06,
"loss": 0.1212,
"step": 620000
},
{
"epoch": 179.75,
"learning_rate": 5.062282734646582e-06,
"loss": 0.138,
"step": 620500
},
{
"epoch": 179.9,
"learning_rate": 5.026071842410197e-06,
"loss": 0.1686,
"step": 621000
},
{
"epoch": 180.04,
"learning_rate": 4.989860950173812e-06,
"loss": 0.1319,
"step": 621500
},
{
"epoch": 180.19,
"learning_rate": 4.9536500579374276e-06,
"loss": 0.1327,
"step": 622000
},
{
"epoch": 180.33,
"learning_rate": 4.917439165701044e-06,
"loss": 0.1409,
"step": 622500
},
{
"epoch": 180.48,
"learning_rate": 4.881228273464659e-06,
"loss": 0.1282,
"step": 623000
},
{
"epoch": 180.62,
"learning_rate": 4.845017381228273e-06,
"loss": 0.1284,
"step": 623500
},
{
"epoch": 180.76,
"learning_rate": 4.808806488991889e-06,
"loss": 0.1314,
"step": 624000
},
{
"epoch": 180.91,
"learning_rate": 4.772595596755505e-06,
"loss": 0.1163,
"step": 624500
},
{
"epoch": 181.05,
"learning_rate": 4.736384704519119e-06,
"loss": 0.1055,
"step": 625000
},
{
"epoch": 181.2,
"learning_rate": 4.700173812282735e-06,
"loss": 0.1329,
"step": 625500
},
{
"epoch": 181.34,
"learning_rate": 4.66396292004635e-06,
"loss": 0.1317,
"step": 626000
},
{
"epoch": 181.49,
"learning_rate": 4.627752027809966e-06,
"loss": 0.1612,
"step": 626500
},
{
"epoch": 181.63,
"learning_rate": 4.591541135573581e-06,
"loss": 0.1269,
"step": 627000
},
{
"epoch": 181.78,
"learning_rate": 4.555330243337196e-06,
"loss": 0.1404,
"step": 627500
},
{
"epoch": 181.92,
"learning_rate": 4.519119351100811e-06,
"loss": 0.1434,
"step": 628000
},
{
"epoch": 182.07,
"learning_rate": 4.482908458864427e-06,
"loss": 0.1427,
"step": 628500
},
{
"epoch": 182.21,
"learning_rate": 4.446697566628042e-06,
"loss": 0.0988,
"step": 629000
},
{
"epoch": 182.36,
"learning_rate": 4.410486674391657e-06,
"loss": 0.1395,
"step": 629500
},
{
"epoch": 182.5,
"learning_rate": 4.374275782155272e-06,
"loss": 0.1365,
"step": 630000
},
{
"epoch": 182.65,
"learning_rate": 4.338064889918888e-06,
"loss": 0.1285,
"step": 630500
},
{
"epoch": 182.79,
"learning_rate": 4.301853997682504e-06,
"loss": 0.1275,
"step": 631000
},
{
"epoch": 182.94,
"learning_rate": 4.265643105446118e-06,
"loss": 0.1411,
"step": 631500
},
{
"epoch": 183.08,
"learning_rate": 4.229432213209733e-06,
"loss": 0.1175,
"step": 632000
},
{
"epoch": 183.23,
"learning_rate": 4.1932213209733495e-06,
"loss": 0.1253,
"step": 632500
},
{
"epoch": 183.37,
"learning_rate": 4.157010428736964e-06,
"loss": 0.1131,
"step": 633000
},
{
"epoch": 183.52,
"learning_rate": 4.12079953650058e-06,
"loss": 0.1263,
"step": 633500
},
{
"epoch": 183.66,
"learning_rate": 4.084588644264195e-06,
"loss": 0.1473,
"step": 634000
},
{
"epoch": 183.81,
"learning_rate": 4.04837775202781e-06,
"loss": 0.1214,
"step": 634500
},
{
"epoch": 183.95,
"learning_rate": 4.012166859791426e-06,
"loss": 0.1385,
"step": 635000
},
{
"epoch": 184.1,
"learning_rate": 3.975955967555041e-06,
"loss": 0.1243,
"step": 635500
},
{
"epoch": 184.24,
"learning_rate": 3.939745075318655e-06,
"loss": 0.1299,
"step": 636000
},
{
"epoch": 184.39,
"learning_rate": 3.9035341830822715e-06,
"loss": 0.1384,
"step": 636500
},
{
"epoch": 184.53,
"learning_rate": 3.867323290845887e-06,
"loss": 0.1267,
"step": 637000
},
{
"epoch": 184.68,
"learning_rate": 3.831112398609502e-06,
"loss": 0.1584,
"step": 637500
},
{
"epoch": 184.82,
"learning_rate": 3.794901506373117e-06,
"loss": 0.1168,
"step": 638000
},
{
"epoch": 184.97,
"learning_rate": 3.7586906141367325e-06,
"loss": 0.1163,
"step": 638500
},
{
"epoch": 185.11,
"learning_rate": 3.722479721900348e-06,
"loss": 0.1161,
"step": 639000
},
{
"epoch": 185.25,
"learning_rate": 3.686268829663963e-06,
"loss": 0.1268,
"step": 639500
},
{
"epoch": 185.4,
"learning_rate": 3.650057937427578e-06,
"loss": 0.1185,
"step": 640000
},
{
"epoch": 185.54,
"learning_rate": 3.613847045191194e-06,
"loss": 0.111,
"step": 640500
},
{
"epoch": 185.69,
"learning_rate": 3.5776361529548087e-06,
"loss": 0.1406,
"step": 641000
},
{
"epoch": 185.83,
"learning_rate": 3.5414252607184244e-06,
"loss": 0.1247,
"step": 641500
},
{
"epoch": 185.98,
"learning_rate": 3.5052143684820396e-06,
"loss": 0.1106,
"step": 642000
},
{
"epoch": 186.12,
"learning_rate": 3.4690034762456544e-06,
"loss": 0.1318,
"step": 642500
},
{
"epoch": 186.27,
"learning_rate": 3.43279258400927e-06,
"loss": 0.1082,
"step": 643000
},
{
"epoch": 186.41,
"learning_rate": 3.3965816917728854e-06,
"loss": 0.1264,
"step": 643500
},
{
"epoch": 186.56,
"learning_rate": 3.360370799536501e-06,
"loss": 0.1201,
"step": 644000
},
{
"epoch": 186.7,
"learning_rate": 3.324159907300116e-06,
"loss": 0.1267,
"step": 644500
},
{
"epoch": 186.85,
"learning_rate": 3.2879490150637315e-06,
"loss": 0.1309,
"step": 645000
},
{
"epoch": 186.99,
"learning_rate": 3.2517381228273468e-06,
"loss": 0.1151,
"step": 645500
},
{
"epoch": 187.14,
"learning_rate": 3.2155272305909616e-06,
"loss": 0.1188,
"step": 646000
},
{
"epoch": 187.28,
"learning_rate": 3.1793163383545773e-06,
"loss": 0.1265,
"step": 646500
},
{
"epoch": 187.43,
"learning_rate": 3.143105446118193e-06,
"loss": 0.119,
"step": 647000
},
{
"epoch": 187.57,
"learning_rate": 3.1068945538818078e-06,
"loss": 0.1231,
"step": 647500
},
{
"epoch": 187.72,
"learning_rate": 3.070683661645423e-06,
"loss": 0.1197,
"step": 648000
},
{
"epoch": 187.86,
"learning_rate": 3.0344727694090383e-06,
"loss": 0.1276,
"step": 648500
},
{
"epoch": 188.01,
"learning_rate": 2.998261877172654e-06,
"loss": 0.1248,
"step": 649000
},
{
"epoch": 188.15,
"learning_rate": 2.9620509849362688e-06,
"loss": 0.1389,
"step": 649500
},
{
"epoch": 188.3,
"learning_rate": 2.9258400926998844e-06,
"loss": 0.1192,
"step": 650000
},
{
"epoch": 188.44,
"learning_rate": 2.8896292004634997e-06,
"loss": 0.1129,
"step": 650500
},
{
"epoch": 188.59,
"learning_rate": 2.853418308227115e-06,
"loss": 0.1014,
"step": 651000
},
{
"epoch": 188.73,
"learning_rate": 2.81720741599073e-06,
"loss": 0.132,
"step": 651500
},
{
"epoch": 188.88,
"learning_rate": 2.7809965237543454e-06,
"loss": 0.1383,
"step": 652000
},
{
"epoch": 189.02,
"learning_rate": 2.744785631517961e-06,
"loss": 0.1387,
"step": 652500
},
{
"epoch": 189.17,
"learning_rate": 2.708574739281576e-06,
"loss": 0.1105,
"step": 653000
},
{
"epoch": 189.31,
"learning_rate": 2.672363847045191e-06,
"loss": 0.1212,
"step": 653500
},
{
"epoch": 189.46,
"learning_rate": 2.636152954808807e-06,
"loss": 0.1115,
"step": 654000
},
{
"epoch": 189.6,
"learning_rate": 2.599942062572422e-06,
"loss": 0.1256,
"step": 654500
},
{
"epoch": 189.75,
"learning_rate": 2.563731170336037e-06,
"loss": 0.1287,
"step": 655000
},
{
"epoch": 189.89,
"learning_rate": 2.5275202780996526e-06,
"loss": 0.117,
"step": 655500
},
{
"epoch": 190.03,
"learning_rate": 2.491309385863268e-06,
"loss": 0.1064,
"step": 656000
},
{
"epoch": 190.18,
"learning_rate": 2.455098493626883e-06,
"loss": 0.1338,
"step": 656500
},
{
"epoch": 190.32,
"learning_rate": 2.4188876013904983e-06,
"loss": 0.1168,
"step": 657000
},
{
"epoch": 190.47,
"learning_rate": 2.3826767091541136e-06,
"loss": 0.1258,
"step": 657500
},
{
"epoch": 190.61,
"learning_rate": 2.3464658169177293e-06,
"loss": 0.1021,
"step": 658000
},
{
"epoch": 190.76,
"learning_rate": 2.310254924681344e-06,
"loss": 0.1199,
"step": 658500
},
{
"epoch": 190.9,
"learning_rate": 2.2740440324449593e-06,
"loss": 0.1065,
"step": 659000
},
{
"epoch": 191.05,
"learning_rate": 2.237833140208575e-06,
"loss": 0.1153,
"step": 659500
},
{
"epoch": 191.19,
"learning_rate": 2.2016222479721903e-06,
"loss": 0.1176,
"step": 660000
},
{
"epoch": 191.34,
"learning_rate": 2.1654113557358055e-06,
"loss": 0.1151,
"step": 660500
},
{
"epoch": 191.48,
"learning_rate": 2.1292004634994208e-06,
"loss": 0.1117,
"step": 661000
},
{
"epoch": 191.63,
"learning_rate": 2.092989571263036e-06,
"loss": 0.1226,
"step": 661500
},
{
"epoch": 191.77,
"learning_rate": 2.0567786790266513e-06,
"loss": 0.0943,
"step": 662000
},
{
"epoch": 191.92,
"learning_rate": 2.0205677867902665e-06,
"loss": 0.1238,
"step": 662500
},
{
"epoch": 192.06,
"learning_rate": 1.984356894553882e-06,
"loss": 0.1108,
"step": 663000
},
{
"epoch": 192.21,
"learning_rate": 1.9481460023174974e-06,
"loss": 0.1135,
"step": 663500
},
{
"epoch": 192.35,
"learning_rate": 1.9119351100811122e-06,
"loss": 0.1023,
"step": 664000
},
{
"epoch": 192.5,
"learning_rate": 1.875724217844728e-06,
"loss": 0.107,
"step": 664500
},
{
"epoch": 192.64,
"learning_rate": 1.8395133256083432e-06,
"loss": 0.1089,
"step": 665000
},
{
"epoch": 192.79,
"learning_rate": 1.8033024333719582e-06,
"loss": 0.104,
"step": 665500
},
{
"epoch": 192.93,
"learning_rate": 1.7670915411355737e-06,
"loss": 0.1172,
"step": 666000
},
{
"epoch": 193.08,
"learning_rate": 1.730880648899189e-06,
"loss": 0.106,
"step": 666500
},
{
"epoch": 193.22,
"learning_rate": 1.6946697566628044e-06,
"loss": 0.0994,
"step": 667000
},
{
"epoch": 193.37,
"learning_rate": 1.6584588644264196e-06,
"loss": 0.105,
"step": 667500
},
{
"epoch": 193.51,
"learning_rate": 1.6222479721900347e-06,
"loss": 0.1141,
"step": 668000
},
{
"epoch": 193.66,
"learning_rate": 1.5860370799536503e-06,
"loss": 0.1127,
"step": 668500
},
{
"epoch": 193.8,
"learning_rate": 1.5498261877172654e-06,
"loss": 0.1112,
"step": 669000
},
{
"epoch": 193.95,
"learning_rate": 1.5136152954808808e-06,
"loss": 0.1333,
"step": 669500
},
{
"epoch": 194.09,
"learning_rate": 1.477404403244496e-06,
"loss": 0.1027,
"step": 670000
},
{
"epoch": 194.24,
"learning_rate": 1.4411935110081113e-06,
"loss": 0.1204,
"step": 670500
},
{
"epoch": 194.38,
"learning_rate": 1.4049826187717266e-06,
"loss": 0.116,
"step": 671000
},
{
"epoch": 194.52,
"learning_rate": 1.3687717265353418e-06,
"loss": 0.1152,
"step": 671500
},
{
"epoch": 194.67,
"learning_rate": 1.3325608342989573e-06,
"loss": 0.1211,
"step": 672000
},
{
"epoch": 194.81,
"learning_rate": 1.2963499420625725e-06,
"loss": 0.103,
"step": 672500
},
{
"epoch": 194.96,
"learning_rate": 1.2601390498261878e-06,
"loss": 0.1091,
"step": 673000
},
{
"epoch": 195.1,
"learning_rate": 1.223928157589803e-06,
"loss": 0.1153,
"step": 673500
},
{
"epoch": 195.25,
"learning_rate": 1.1877172653534185e-06,
"loss": 0.1098,
"step": 674000
},
{
"epoch": 195.39,
"learning_rate": 1.1515063731170337e-06,
"loss": 0.1263,
"step": 674500
},
{
"epoch": 195.54,
"learning_rate": 1.115295480880649e-06,
"loss": 0.1176,
"step": 675000
},
{
"epoch": 195.68,
"learning_rate": 1.0790845886442642e-06,
"loss": 0.1088,
"step": 675500
},
{
"epoch": 195.83,
"learning_rate": 1.0428736964078795e-06,
"loss": 0.1062,
"step": 676000
},
{
"epoch": 195.97,
"learning_rate": 1.006662804171495e-06,
"loss": 0.1102,
"step": 676500
},
{
"epoch": 196.12,
"learning_rate": 9.7045191193511e-07,
"loss": 0.1305,
"step": 677000
},
{
"epoch": 196.26,
"learning_rate": 9.342410196987254e-07,
"loss": 0.1213,
"step": 677500
},
{
"epoch": 196.41,
"learning_rate": 8.980301274623407e-07,
"loss": 0.1173,
"step": 678000
},
{
"epoch": 196.55,
"learning_rate": 8.61819235225956e-07,
"loss": 0.1198,
"step": 678500
},
{
"epoch": 196.7,
"learning_rate": 8.256083429895712e-07,
"loss": 0.1099,
"step": 679000
},
{
"epoch": 196.84,
"learning_rate": 7.893974507531865e-07,
"loss": 0.1051,
"step": 679500
},
{
"epoch": 196.99,
"learning_rate": 7.531865585168019e-07,
"loss": 0.11,
"step": 680000
},
{
"epoch": 197.13,
"learning_rate": 7.169756662804171e-07,
"loss": 0.1015,
"step": 680500
},
{
"epoch": 197.28,
"learning_rate": 6.807647740440325e-07,
"loss": 0.1107,
"step": 681000
},
{
"epoch": 197.42,
"learning_rate": 6.445538818076477e-07,
"loss": 0.0984,
"step": 681500
},
{
"epoch": 197.57,
"learning_rate": 6.083429895712631e-07,
"loss": 0.1129,
"step": 682000
},
{
"epoch": 197.71,
"learning_rate": 5.721320973348784e-07,
"loss": 0.1154,
"step": 682500
},
{
"epoch": 197.86,
"learning_rate": 5.359212050984936e-07,
"loss": 0.1049,
"step": 683000
},
{
"epoch": 198.0,
"learning_rate": 4.99710312862109e-07,
"loss": 0.1267,
"step": 683500
},
{
"epoch": 198.15,
"learning_rate": 4.6349942062572426e-07,
"loss": 0.1142,
"step": 684000
},
{
"epoch": 198.29,
"learning_rate": 4.2728852838933956e-07,
"loss": 0.0944,
"step": 684500
},
{
"epoch": 198.44,
"learning_rate": 3.910776361529548e-07,
"loss": 0.1224,
"step": 685000
},
{
"epoch": 198.58,
"learning_rate": 3.548667439165701e-07,
"loss": 0.1043,
"step": 685500
},
{
"epoch": 198.73,
"learning_rate": 3.186558516801854e-07,
"loss": 0.1193,
"step": 686000
},
{
"epoch": 198.87,
"learning_rate": 2.824449594438007e-07,
"loss": 0.1056,
"step": 686500
},
{
"epoch": 199.02,
"learning_rate": 2.4623406720741596e-07,
"loss": 0.1252,
"step": 687000
},
{
"epoch": 199.16,
"learning_rate": 2.1002317497103131e-07,
"loss": 0.1309,
"step": 687500
},
{
"epoch": 199.3,
"learning_rate": 1.738122827346466e-07,
"loss": 0.106,
"step": 688000
},
{
"epoch": 199.45,
"learning_rate": 1.376013904982619e-07,
"loss": 0.1152,
"step": 688500
},
{
"epoch": 199.59,
"learning_rate": 1.0139049826187717e-07,
"loss": 0.1014,
"step": 689000
},
{
"epoch": 199.74,
"learning_rate": 6.517960602549247e-08,
"loss": 0.1042,
"step": 689500
},
{
"epoch": 199.88,
"learning_rate": 2.8968713789107766e-08,
"loss": 0.1189,
"step": 690000
}
],
"max_steps": 690400,
"num_train_epochs": 200,
"total_flos": 3.3589830897477504e+16,
"trial_name": null,
"trial_params": null
}