klue-roberta-finetuned-korquad-v2 / trainer_state.json
uomnf97's picture
feat: upload model
62ca9eb
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 39912,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.988975746642615e-06,
"loss": 3.3419,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 9.97644818600922e-06,
"loss": 2.1466,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 9.963920625375829e-06,
"loss": 1.7654,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 9.951393064742433e-06,
"loss": 1.4769,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 9.938865504109041e-06,
"loss": 1.6047,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 9.926337943475647e-06,
"loss": 1.581,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 9.913810382842254e-06,
"loss": 1.4987,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 9.901282822208862e-06,
"loss": 1.4837,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 9.888755261575466e-06,
"loss": 1.4321,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 9.876227700942074e-06,
"loss": 1.4264,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 9.86370014030868e-06,
"loss": 1.3971,
"step": 550
},
{
"epoch": 0.05,
"learning_rate": 9.851172579675286e-06,
"loss": 1.442,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.838645019041893e-06,
"loss": 1.3419,
"step": 650
},
{
"epoch": 0.05,
"learning_rate": 9.826117458408499e-06,
"loss": 1.4171,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 9.813589897775107e-06,
"loss": 1.4661,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 9.801062337141713e-06,
"loss": 1.3163,
"step": 800
},
{
"epoch": 0.06,
"learning_rate": 9.78853477650832e-06,
"loss": 1.3701,
"step": 850
},
{
"epoch": 0.07,
"learning_rate": 9.776007215874925e-06,
"loss": 1.3991,
"step": 900
},
{
"epoch": 0.07,
"learning_rate": 9.764231308879536e-06,
"loss": 1.125,
"step": 950
},
{
"epoch": 0.08,
"learning_rate": 9.751703748246142e-06,
"loss": 0.4997,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 9.739176187612749e-06,
"loss": 0.504,
"step": 1050
},
{
"epoch": 0.08,
"learning_rate": 9.726648626979357e-06,
"loss": 0.4567,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 9.714121066345961e-06,
"loss": 0.4345,
"step": 1150
},
{
"epoch": 0.09,
"learning_rate": 9.701593505712569e-06,
"loss": 0.3392,
"step": 1200
},
{
"epoch": 0.09,
"learning_rate": 9.689065945079175e-06,
"loss": 0.4173,
"step": 1250
},
{
"epoch": 0.1,
"learning_rate": 9.676538384445782e-06,
"loss": 0.4116,
"step": 1300
},
{
"epoch": 0.1,
"learning_rate": 9.664010823812388e-06,
"loss": 0.4069,
"step": 1350
},
{
"epoch": 0.11,
"learning_rate": 9.651483263178994e-06,
"loss": 0.4034,
"step": 1400
},
{
"epoch": 0.11,
"learning_rate": 9.638955702545602e-06,
"loss": 0.3894,
"step": 1450
},
{
"epoch": 0.11,
"learning_rate": 9.626428141912208e-06,
"loss": 0.4058,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 9.613900581278814e-06,
"loss": 0.3719,
"step": 1550
},
{
"epoch": 0.12,
"learning_rate": 9.60137302064542e-06,
"loss": 0.398,
"step": 1600
},
{
"epoch": 0.12,
"learning_rate": 9.588845460012027e-06,
"loss": 0.2995,
"step": 1650
},
{
"epoch": 0.13,
"learning_rate": 9.576317899378635e-06,
"loss": 0.3297,
"step": 1700
},
{
"epoch": 0.13,
"learning_rate": 9.563790338745241e-06,
"loss": 0.3102,
"step": 1750
},
{
"epoch": 0.14,
"learning_rate": 9.551262778111847e-06,
"loss": 0.3882,
"step": 1800
},
{
"epoch": 0.14,
"learning_rate": 9.538735217478453e-06,
"loss": 0.4243,
"step": 1850
},
{
"epoch": 0.14,
"learning_rate": 9.52620765684506e-06,
"loss": 0.4063,
"step": 1900
},
{
"epoch": 0.15,
"learning_rate": 9.513680096211666e-06,
"loss": 0.3378,
"step": 1950
},
{
"epoch": 0.15,
"learning_rate": 9.501152535578274e-06,
"loss": 0.444,
"step": 2000
},
{
"epoch": 0.15,
"learning_rate": 9.48862497494488e-06,
"loss": 0.3442,
"step": 2050
},
{
"epoch": 0.16,
"learning_rate": 9.476097414311486e-06,
"loss": 0.3599,
"step": 2100
},
{
"epoch": 0.16,
"learning_rate": 9.463569853678092e-06,
"loss": 0.4417,
"step": 2150
},
{
"epoch": 0.17,
"learning_rate": 9.451042293044699e-06,
"loss": 0.3398,
"step": 2200
},
{
"epoch": 0.17,
"learning_rate": 9.438514732411306e-06,
"loss": 0.4124,
"step": 2250
},
{
"epoch": 0.17,
"learning_rate": 9.425987171777911e-06,
"loss": 0.3668,
"step": 2300
},
{
"epoch": 0.18,
"learning_rate": 9.413459611144519e-06,
"loss": 0.3484,
"step": 2350
},
{
"epoch": 0.18,
"learning_rate": 9.400932050511125e-06,
"loss": 0.3153,
"step": 2400
},
{
"epoch": 0.18,
"learning_rate": 9.388404489877731e-06,
"loss": 0.3886,
"step": 2450
},
{
"epoch": 0.19,
"learning_rate": 9.37587692924434e-06,
"loss": 0.3519,
"step": 2500
},
{
"epoch": 0.19,
"learning_rate": 9.363349368610944e-06,
"loss": 0.2856,
"step": 2550
},
{
"epoch": 0.2,
"learning_rate": 9.350821807977552e-06,
"loss": 0.3399,
"step": 2600
},
{
"epoch": 0.2,
"learning_rate": 9.338294247344158e-06,
"loss": 0.3406,
"step": 2650
},
{
"epoch": 0.2,
"learning_rate": 9.325766686710764e-06,
"loss": 0.3541,
"step": 2700
},
{
"epoch": 0.21,
"learning_rate": 9.313239126077372e-06,
"loss": 0.3573,
"step": 2750
},
{
"epoch": 0.21,
"learning_rate": 9.300711565443977e-06,
"loss": 0.3527,
"step": 2800
},
{
"epoch": 0.21,
"learning_rate": 9.288184004810585e-06,
"loss": 0.3678,
"step": 2850
},
{
"epoch": 0.22,
"learning_rate": 9.27565644417719e-06,
"loss": 0.3185,
"step": 2900
},
{
"epoch": 0.22,
"learning_rate": 9.263128883543797e-06,
"loss": 0.3216,
"step": 2950
},
{
"epoch": 0.23,
"learning_rate": 9.250601322910405e-06,
"loss": 0.3137,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 9.23807376227701e-06,
"loss": 0.3626,
"step": 3050
},
{
"epoch": 0.23,
"learning_rate": 9.225546201643617e-06,
"loss": 0.3488,
"step": 3100
},
{
"epoch": 0.24,
"learning_rate": 9.213018641010224e-06,
"loss": 0.3627,
"step": 3150
},
{
"epoch": 0.24,
"learning_rate": 9.20049108037683e-06,
"loss": 0.3051,
"step": 3200
},
{
"epoch": 0.24,
"learning_rate": 9.187963519743436e-06,
"loss": 0.3838,
"step": 3250
},
{
"epoch": 0.25,
"learning_rate": 9.175435959110042e-06,
"loss": 0.3439,
"step": 3300
},
{
"epoch": 0.25,
"learning_rate": 9.16290839847665e-06,
"loss": 0.352,
"step": 3350
},
{
"epoch": 0.26,
"learning_rate": 9.150380837843256e-06,
"loss": 0.3984,
"step": 3400
},
{
"epoch": 0.26,
"learning_rate": 9.137853277209863e-06,
"loss": 0.3368,
"step": 3450
},
{
"epoch": 0.26,
"learning_rate": 9.125325716576469e-06,
"loss": 0.3056,
"step": 3500
},
{
"epoch": 0.27,
"learning_rate": 9.112798155943075e-06,
"loss": 0.3133,
"step": 3550
},
{
"epoch": 0.27,
"learning_rate": 9.100270595309683e-06,
"loss": 0.3852,
"step": 3600
},
{
"epoch": 0.27,
"learning_rate": 9.08774303467629e-06,
"loss": 0.2698,
"step": 3650
},
{
"epoch": 0.28,
"learning_rate": 9.075215474042895e-06,
"loss": 0.398,
"step": 3700
},
{
"epoch": 0.28,
"learning_rate": 9.062687913409502e-06,
"loss": 0.308,
"step": 3750
},
{
"epoch": 0.29,
"learning_rate": 9.050160352776108e-06,
"loss": 0.3749,
"step": 3800
},
{
"epoch": 0.29,
"learning_rate": 9.037632792142714e-06,
"loss": 0.3018,
"step": 3850
},
{
"epoch": 0.29,
"learning_rate": 9.025105231509322e-06,
"loss": 0.3239,
"step": 3900
},
{
"epoch": 0.3,
"learning_rate": 9.012577670875928e-06,
"loss": 0.2677,
"step": 3950
},
{
"epoch": 0.3,
"learning_rate": 9.000050110242534e-06,
"loss": 0.2714,
"step": 4000
},
{
"epoch": 0.3,
"learning_rate": 8.98752254960914e-06,
"loss": 0.2999,
"step": 4050
},
{
"epoch": 0.31,
"learning_rate": 8.974994988975747e-06,
"loss": 0.3023,
"step": 4100
},
{
"epoch": 0.31,
"learning_rate": 8.962467428342355e-06,
"loss": 0.3228,
"step": 4150
},
{
"epoch": 0.32,
"learning_rate": 8.94993986770896e-06,
"loss": 0.2647,
"step": 4200
},
{
"epoch": 0.32,
"learning_rate": 8.937412307075567e-06,
"loss": 0.3581,
"step": 4250
},
{
"epoch": 0.32,
"learning_rate": 8.924884746442173e-06,
"loss": 0.3389,
"step": 4300
},
{
"epoch": 0.33,
"learning_rate": 8.91235718580878e-06,
"loss": 0.3797,
"step": 4350
},
{
"epoch": 0.33,
"learning_rate": 8.899829625175388e-06,
"loss": 0.3023,
"step": 4400
},
{
"epoch": 0.33,
"learning_rate": 8.887302064541992e-06,
"loss": 0.2709,
"step": 4450
},
{
"epoch": 0.34,
"learning_rate": 8.8747745039086e-06,
"loss": 0.3533,
"step": 4500
},
{
"epoch": 0.34,
"learning_rate": 8.862246943275206e-06,
"loss": 0.2852,
"step": 4550
},
{
"epoch": 0.35,
"learning_rate": 8.849719382641812e-06,
"loss": 0.3157,
"step": 4600
},
{
"epoch": 0.35,
"learning_rate": 8.83719182200842e-06,
"loss": 0.3187,
"step": 4650
},
{
"epoch": 0.35,
"learning_rate": 8.824664261375025e-06,
"loss": 0.318,
"step": 4700
},
{
"epoch": 0.36,
"learning_rate": 8.812136700741633e-06,
"loss": 0.3457,
"step": 4750
},
{
"epoch": 0.36,
"learning_rate": 8.799609140108239e-06,
"loss": 0.3347,
"step": 4800
},
{
"epoch": 0.36,
"learning_rate": 8.787081579474845e-06,
"loss": 0.2931,
"step": 4850
},
{
"epoch": 0.37,
"learning_rate": 8.774554018841453e-06,
"loss": 0.3019,
"step": 4900
},
{
"epoch": 0.37,
"learning_rate": 8.762026458208058e-06,
"loss": 0.2998,
"step": 4950
},
{
"epoch": 0.38,
"learning_rate": 8.749498897574666e-06,
"loss": 0.2842,
"step": 5000
},
{
"epoch": 0.38,
"learning_rate": 8.736971336941272e-06,
"loss": 0.3356,
"step": 5050
},
{
"epoch": 0.38,
"learning_rate": 8.724443776307878e-06,
"loss": 0.3036,
"step": 5100
},
{
"epoch": 0.39,
"learning_rate": 8.711916215674484e-06,
"loss": 0.2718,
"step": 5150
},
{
"epoch": 0.39,
"learning_rate": 8.69938865504109e-06,
"loss": 0.2997,
"step": 5200
},
{
"epoch": 0.39,
"learning_rate": 8.686861094407698e-06,
"loss": 0.3109,
"step": 5250
},
{
"epoch": 0.4,
"learning_rate": 8.674333533774305e-06,
"loss": 0.2624,
"step": 5300
},
{
"epoch": 0.4,
"learning_rate": 8.661805973140911e-06,
"loss": 0.319,
"step": 5350
},
{
"epoch": 0.41,
"learning_rate": 8.649278412507517e-06,
"loss": 0.2912,
"step": 5400
},
{
"epoch": 0.41,
"learning_rate": 8.636750851874123e-06,
"loss": 0.3112,
"step": 5450
},
{
"epoch": 0.41,
"learning_rate": 8.62422329124073e-06,
"loss": 0.3211,
"step": 5500
},
{
"epoch": 0.42,
"learning_rate": 8.611695730607337e-06,
"loss": 0.305,
"step": 5550
},
{
"epoch": 0.42,
"learning_rate": 8.599168169973944e-06,
"loss": 0.3497,
"step": 5600
},
{
"epoch": 0.42,
"learning_rate": 8.58664060934055e-06,
"loss": 0.3449,
"step": 5650
},
{
"epoch": 0.43,
"learning_rate": 8.574113048707156e-06,
"loss": 0.3943,
"step": 5700
},
{
"epoch": 0.43,
"learning_rate": 8.561585488073762e-06,
"loss": 0.2814,
"step": 5750
},
{
"epoch": 0.44,
"learning_rate": 8.54905792744037e-06,
"loss": 0.316,
"step": 5800
},
{
"epoch": 0.44,
"learning_rate": 8.536530366806977e-06,
"loss": 0.2893,
"step": 5850
},
{
"epoch": 0.44,
"learning_rate": 8.524002806173583e-06,
"loss": 0.3096,
"step": 5900
},
{
"epoch": 0.45,
"learning_rate": 8.511475245540189e-06,
"loss": 0.2938,
"step": 5950
},
{
"epoch": 0.45,
"learning_rate": 8.498947684906795e-06,
"loss": 0.2381,
"step": 6000
},
{
"epoch": 0.45,
"learning_rate": 8.486420124273403e-06,
"loss": 0.2592,
"step": 6050
},
{
"epoch": 0.46,
"learning_rate": 8.473892563640008e-06,
"loss": 0.3162,
"step": 6100
},
{
"epoch": 0.46,
"learning_rate": 8.461365003006616e-06,
"loss": 0.2905,
"step": 6150
},
{
"epoch": 0.47,
"learning_rate": 8.448837442373222e-06,
"loss": 0.2742,
"step": 6200
},
{
"epoch": 0.47,
"learning_rate": 8.436309881739828e-06,
"loss": 0.2994,
"step": 6250
},
{
"epoch": 0.47,
"learning_rate": 8.423782321106436e-06,
"loss": 0.2806,
"step": 6300
},
{
"epoch": 0.48,
"learning_rate": 8.41125476047304e-06,
"loss": 0.2868,
"step": 6350
},
{
"epoch": 0.48,
"learning_rate": 8.398727199839648e-06,
"loss": 0.3883,
"step": 6400
},
{
"epoch": 0.48,
"learning_rate": 8.386199639206255e-06,
"loss": 0.3111,
"step": 6450
},
{
"epoch": 0.49,
"learning_rate": 8.37367207857286e-06,
"loss": 0.2853,
"step": 6500
},
{
"epoch": 0.49,
"learning_rate": 8.361144517939469e-06,
"loss": 0.2741,
"step": 6550
},
{
"epoch": 0.5,
"learning_rate": 8.348616957306073e-06,
"loss": 0.2604,
"step": 6600
},
{
"epoch": 0.5,
"learning_rate": 8.336089396672681e-06,
"loss": 0.294,
"step": 6650
},
{
"epoch": 0.5,
"learning_rate": 8.323561836039287e-06,
"loss": 0.2728,
"step": 6700
},
{
"epoch": 0.51,
"learning_rate": 8.311034275405894e-06,
"loss": 0.2965,
"step": 6750
},
{
"epoch": 0.51,
"learning_rate": 8.298506714772502e-06,
"loss": 0.2178,
"step": 6800
},
{
"epoch": 0.51,
"learning_rate": 8.285979154139106e-06,
"loss": 0.283,
"step": 6850
},
{
"epoch": 0.52,
"learning_rate": 8.273451593505714e-06,
"loss": 0.2512,
"step": 6900
},
{
"epoch": 0.52,
"learning_rate": 8.26092403287232e-06,
"loss": 0.3288,
"step": 6950
},
{
"epoch": 0.53,
"learning_rate": 8.248396472238926e-06,
"loss": 0.3238,
"step": 7000
},
{
"epoch": 0.53,
"learning_rate": 8.235868911605533e-06,
"loss": 0.3111,
"step": 7050
},
{
"epoch": 0.53,
"learning_rate": 8.223341350972139e-06,
"loss": 0.2927,
"step": 7100
},
{
"epoch": 0.54,
"learning_rate": 8.210813790338747e-06,
"loss": 0.2656,
"step": 7150
},
{
"epoch": 0.54,
"learning_rate": 8.198286229705353e-06,
"loss": 0.2824,
"step": 7200
},
{
"epoch": 0.54,
"learning_rate": 8.18575866907196e-06,
"loss": 0.2994,
"step": 7250
},
{
"epoch": 0.55,
"learning_rate": 8.173231108438565e-06,
"loss": 0.3092,
"step": 7300
},
{
"epoch": 0.55,
"learning_rate": 8.160703547805172e-06,
"loss": 0.2535,
"step": 7350
},
{
"epoch": 0.56,
"learning_rate": 8.148175987171778e-06,
"loss": 0.254,
"step": 7400
},
{
"epoch": 0.56,
"learning_rate": 8.135648426538386e-06,
"loss": 0.2797,
"step": 7450
},
{
"epoch": 0.56,
"learning_rate": 8.123120865904992e-06,
"loss": 0.2635,
"step": 7500
},
{
"epoch": 0.57,
"learning_rate": 8.110593305271598e-06,
"loss": 0.271,
"step": 7550
},
{
"epoch": 0.57,
"learning_rate": 8.098065744638204e-06,
"loss": 0.3131,
"step": 7600
},
{
"epoch": 0.58,
"learning_rate": 8.08553818400481e-06,
"loss": 0.266,
"step": 7650
},
{
"epoch": 0.58,
"learning_rate": 8.073010623371419e-06,
"loss": 0.2765,
"step": 7700
},
{
"epoch": 0.58,
"learning_rate": 8.060483062738025e-06,
"loss": 0.3463,
"step": 7750
},
{
"epoch": 0.59,
"learning_rate": 8.047955502104631e-06,
"loss": 0.3163,
"step": 7800
},
{
"epoch": 0.59,
"learning_rate": 8.035427941471237e-06,
"loss": 0.3108,
"step": 7850
},
{
"epoch": 0.59,
"learning_rate": 8.022900380837843e-06,
"loss": 0.244,
"step": 7900
},
{
"epoch": 0.6,
"learning_rate": 8.010372820204451e-06,
"loss": 0.2798,
"step": 7950
},
{
"epoch": 0.6,
"learning_rate": 7.997845259571056e-06,
"loss": 0.3019,
"step": 8000
},
{
"epoch": 0.61,
"learning_rate": 7.985317698937664e-06,
"loss": 0.2541,
"step": 8050
},
{
"epoch": 0.61,
"learning_rate": 7.97279013830427e-06,
"loss": 0.303,
"step": 8100
},
{
"epoch": 0.61,
"learning_rate": 7.960262577670876e-06,
"loss": 0.2793,
"step": 8150
},
{
"epoch": 0.62,
"learning_rate": 7.947735017037484e-06,
"loss": 0.3295,
"step": 8200
},
{
"epoch": 0.62,
"learning_rate": 7.935207456404089e-06,
"loss": 0.2729,
"step": 8250
},
{
"epoch": 0.62,
"learning_rate": 7.922679895770697e-06,
"loss": 0.2907,
"step": 8300
},
{
"epoch": 0.63,
"learning_rate": 7.910152335137303e-06,
"loss": 0.2629,
"step": 8350
},
{
"epoch": 0.63,
"learning_rate": 7.897624774503909e-06,
"loss": 0.2181,
"step": 8400
},
{
"epoch": 0.64,
"learning_rate": 7.885097213870517e-06,
"loss": 0.3072,
"step": 8450
},
{
"epoch": 0.64,
"learning_rate": 7.872569653237122e-06,
"loss": 0.2183,
"step": 8500
},
{
"epoch": 0.64,
"learning_rate": 7.86004209260373e-06,
"loss": 0.2554,
"step": 8550
},
{
"epoch": 0.65,
"learning_rate": 7.847514531970336e-06,
"loss": 0.2644,
"step": 8600
},
{
"epoch": 0.65,
"learning_rate": 7.834986971336942e-06,
"loss": 0.2844,
"step": 8650
},
{
"epoch": 0.65,
"learning_rate": 7.82245941070355e-06,
"loss": 0.2938,
"step": 8700
},
{
"epoch": 0.66,
"learning_rate": 7.809931850070154e-06,
"loss": 0.2484,
"step": 8750
},
{
"epoch": 0.66,
"learning_rate": 7.797404289436762e-06,
"loss": 0.2555,
"step": 8800
},
{
"epoch": 0.67,
"learning_rate": 7.784876728803368e-06,
"loss": 0.2861,
"step": 8850
},
{
"epoch": 0.67,
"learning_rate": 7.772349168169975e-06,
"loss": 0.2805,
"step": 8900
},
{
"epoch": 0.67,
"learning_rate": 7.759821607536581e-06,
"loss": 0.2554,
"step": 8950
},
{
"epoch": 0.68,
"learning_rate": 7.747294046903187e-06,
"loss": 0.2374,
"step": 9000
},
{
"epoch": 0.68,
"learning_rate": 7.734766486269795e-06,
"loss": 0.2382,
"step": 9050
},
{
"epoch": 0.68,
"learning_rate": 7.722238925636401e-06,
"loss": 0.307,
"step": 9100
},
{
"epoch": 0.69,
"learning_rate": 7.709711365003007e-06,
"loss": 0.2953,
"step": 9150
},
{
"epoch": 0.69,
"learning_rate": 7.697183804369614e-06,
"loss": 0.312,
"step": 9200
},
{
"epoch": 0.7,
"learning_rate": 7.68465624373622e-06,
"loss": 0.3371,
"step": 9250
},
{
"epoch": 0.7,
"learning_rate": 7.672128683102826e-06,
"loss": 0.3123,
"step": 9300
},
{
"epoch": 0.7,
"learning_rate": 7.659601122469434e-06,
"loss": 0.2979,
"step": 9350
},
{
"epoch": 0.71,
"learning_rate": 7.64707356183604e-06,
"loss": 0.2655,
"step": 9400
},
{
"epoch": 0.71,
"learning_rate": 7.634546001202647e-06,
"loss": 0.293,
"step": 9450
},
{
"epoch": 0.71,
"learning_rate": 7.622018440569254e-06,
"loss": 0.2724,
"step": 9500
},
{
"epoch": 0.72,
"learning_rate": 7.609490879935859e-06,
"loss": 0.2549,
"step": 9550
},
{
"epoch": 0.72,
"learning_rate": 7.596963319302466e-06,
"loss": 0.2383,
"step": 9600
},
{
"epoch": 0.73,
"learning_rate": 7.584435758669073e-06,
"loss": 0.3682,
"step": 9650
},
{
"epoch": 0.73,
"learning_rate": 7.571908198035679e-06,
"loss": 0.3049,
"step": 9700
},
{
"epoch": 0.73,
"learning_rate": 7.559380637402286e-06,
"loss": 0.2522,
"step": 9750
},
{
"epoch": 0.74,
"learning_rate": 7.546853076768892e-06,
"loss": 0.3385,
"step": 9800
},
{
"epoch": 0.74,
"learning_rate": 7.534325516135499e-06,
"loss": 0.2306,
"step": 9850
},
{
"epoch": 0.74,
"learning_rate": 7.521797955502105e-06,
"loss": 0.2639,
"step": 9900
},
{
"epoch": 0.75,
"learning_rate": 7.509270394868712e-06,
"loss": 0.2643,
"step": 9950
},
{
"epoch": 0.75,
"learning_rate": 7.496742834235319e-06,
"loss": 0.2674,
"step": 10000
},
{
"epoch": 0.76,
"learning_rate": 7.4842152736019246e-06,
"loss": 0.2248,
"step": 10050
},
{
"epoch": 0.76,
"learning_rate": 7.471687712968532e-06,
"loss": 0.2558,
"step": 10100
},
{
"epoch": 0.76,
"learning_rate": 7.459160152335138e-06,
"loss": 0.2937,
"step": 10150
},
{
"epoch": 0.77,
"learning_rate": 7.446632591701745e-06,
"loss": 0.2297,
"step": 10200
},
{
"epoch": 0.77,
"learning_rate": 7.43410503106835e-06,
"loss": 0.2728,
"step": 10250
},
{
"epoch": 0.77,
"learning_rate": 7.421577470434957e-06,
"loss": 0.2819,
"step": 10300
},
{
"epoch": 0.78,
"learning_rate": 7.4090499098015644e-06,
"loss": 0.2745,
"step": 10350
},
{
"epoch": 0.78,
"learning_rate": 7.396522349168171e-06,
"loss": 0.2283,
"step": 10400
},
{
"epoch": 0.79,
"learning_rate": 7.383994788534778e-06,
"loss": 0.357,
"step": 10450
},
{
"epoch": 0.79,
"learning_rate": 7.371467227901383e-06,
"loss": 0.2901,
"step": 10500
},
{
"epoch": 0.79,
"learning_rate": 7.35893966726799e-06,
"loss": 0.2207,
"step": 10550
},
{
"epoch": 0.8,
"learning_rate": 7.346412106634597e-06,
"loss": 0.2898,
"step": 10600
},
{
"epoch": 0.8,
"learning_rate": 7.3338845460012035e-06,
"loss": 0.2482,
"step": 10650
},
{
"epoch": 0.8,
"learning_rate": 7.3213569853678105e-06,
"loss": 0.2333,
"step": 10700
},
{
"epoch": 0.81,
"learning_rate": 7.308829424734416e-06,
"loss": 0.2796,
"step": 10750
},
{
"epoch": 0.81,
"learning_rate": 7.296301864101023e-06,
"loss": 0.2594,
"step": 10800
},
{
"epoch": 0.82,
"learning_rate": 7.283774303467629e-06,
"loss": 0.2918,
"step": 10850
},
{
"epoch": 0.82,
"learning_rate": 7.271246742834236e-06,
"loss": 0.3055,
"step": 10900
},
{
"epoch": 0.82,
"learning_rate": 7.258719182200843e-06,
"loss": 0.2863,
"step": 10950
},
{
"epoch": 0.83,
"learning_rate": 7.246191621567449e-06,
"loss": 0.2995,
"step": 11000
},
{
"epoch": 0.83,
"learning_rate": 7.233664060934056e-06,
"loss": 0.2065,
"step": 11050
},
{
"epoch": 0.83,
"learning_rate": 7.221136500300662e-06,
"loss": 0.296,
"step": 11100
},
{
"epoch": 0.84,
"learning_rate": 7.208608939667269e-06,
"loss": 0.2704,
"step": 11150
},
{
"epoch": 0.84,
"learning_rate": 7.1960813790338745e-06,
"loss": 0.2284,
"step": 11200
},
{
"epoch": 0.85,
"learning_rate": 7.1835538184004815e-06,
"loss": 0.356,
"step": 11250
},
{
"epoch": 0.85,
"learning_rate": 7.171026257767089e-06,
"loss": 0.3024,
"step": 11300
},
{
"epoch": 0.85,
"learning_rate": 7.158498697133695e-06,
"loss": 0.2644,
"step": 11350
},
{
"epoch": 0.86,
"learning_rate": 7.145971136500302e-06,
"loss": 0.2668,
"step": 11400
},
{
"epoch": 0.86,
"learning_rate": 7.133443575866907e-06,
"loss": 0.2337,
"step": 11450
},
{
"epoch": 0.86,
"learning_rate": 7.120916015233514e-06,
"loss": 0.2522,
"step": 11500
},
{
"epoch": 0.87,
"learning_rate": 7.108388454600121e-06,
"loss": 0.2578,
"step": 11550
},
{
"epoch": 0.87,
"learning_rate": 7.095860893966728e-06,
"loss": 0.2453,
"step": 11600
},
{
"epoch": 0.88,
"learning_rate": 7.083333333333335e-06,
"loss": 0.2782,
"step": 11650
},
{
"epoch": 0.88,
"learning_rate": 7.07080577269994e-06,
"loss": 0.2345,
"step": 11700
},
{
"epoch": 0.88,
"learning_rate": 7.058278212066547e-06,
"loss": 0.2312,
"step": 11750
},
{
"epoch": 0.89,
"learning_rate": 7.045750651433153e-06,
"loss": 0.2479,
"step": 11800
},
{
"epoch": 0.89,
"learning_rate": 7.0332230907997604e-06,
"loss": 0.2239,
"step": 11850
},
{
"epoch": 0.89,
"learning_rate": 7.020695530166367e-06,
"loss": 0.2877,
"step": 11900
},
{
"epoch": 0.9,
"learning_rate": 7.008167969532973e-06,
"loss": 0.3235,
"step": 11950
},
{
"epoch": 0.9,
"learning_rate": 6.99564040889958e-06,
"loss": 0.2194,
"step": 12000
},
{
"epoch": 0.91,
"learning_rate": 6.983112848266186e-06,
"loss": 0.233,
"step": 12050
},
{
"epoch": 0.91,
"learning_rate": 6.970585287632793e-06,
"loss": 0.3014,
"step": 12100
},
{
"epoch": 0.91,
"learning_rate": 6.958057726999399e-06,
"loss": 0.2789,
"step": 12150
},
{
"epoch": 0.92,
"learning_rate": 6.945530166366006e-06,
"loss": 0.2465,
"step": 12200
},
{
"epoch": 0.92,
"learning_rate": 6.933002605732613e-06,
"loss": 0.2327,
"step": 12250
},
{
"epoch": 0.92,
"learning_rate": 6.920475045099219e-06,
"loss": 0.2878,
"step": 12300
},
{
"epoch": 0.93,
"learning_rate": 6.907947484465826e-06,
"loss": 0.3226,
"step": 12350
},
{
"epoch": 0.93,
"learning_rate": 6.895419923832431e-06,
"loss": 0.2277,
"step": 12400
},
{
"epoch": 0.94,
"learning_rate": 6.8828923631990385e-06,
"loss": 0.2458,
"step": 12450
},
{
"epoch": 0.94,
"learning_rate": 6.8703648025656455e-06,
"loss": 0.2458,
"step": 12500
},
{
"epoch": 0.94,
"learning_rate": 6.857837241932252e-06,
"loss": 0.2474,
"step": 12550
},
{
"epoch": 0.95,
"learning_rate": 6.845560232511526e-06,
"loss": 0.2596,
"step": 12600
},
{
"epoch": 0.95,
"learning_rate": 6.833032671878133e-06,
"loss": 0.2835,
"step": 12650
},
{
"epoch": 0.95,
"learning_rate": 6.820505111244738e-06,
"loss": 0.2835,
"step": 12700
},
{
"epoch": 0.96,
"learning_rate": 6.807977550611345e-06,
"loss": 0.3121,
"step": 12750
},
{
"epoch": 0.96,
"learning_rate": 6.795449989977952e-06,
"loss": 0.2801,
"step": 12800
},
{
"epoch": 0.97,
"learning_rate": 6.7829224293445584e-06,
"loss": 0.2506,
"step": 12850
},
{
"epoch": 0.97,
"learning_rate": 6.7703948687111655e-06,
"loss": 0.2275,
"step": 12900
},
{
"epoch": 0.97,
"learning_rate": 6.757867308077771e-06,
"loss": 0.2385,
"step": 12950
},
{
"epoch": 0.98,
"learning_rate": 6.745339747444378e-06,
"loss": 0.279,
"step": 13000
},
{
"epoch": 0.98,
"learning_rate": 6.732812186810985e-06,
"loss": 0.2528,
"step": 13050
},
{
"epoch": 0.98,
"learning_rate": 6.720284626177591e-06,
"loss": 0.283,
"step": 13100
},
{
"epoch": 0.99,
"learning_rate": 6.707757065544198e-06,
"loss": 0.3245,
"step": 13150
},
{
"epoch": 0.99,
"learning_rate": 6.695229504910804e-06,
"loss": 0.1986,
"step": 13200
},
{
"epoch": 1.0,
"learning_rate": 6.682701944277411e-06,
"loss": 0.2088,
"step": 13250
},
{
"epoch": 1.0,
"learning_rate": 6.670174383644017e-06,
"loss": 0.2689,
"step": 13300
},
{
"epoch": 1.0,
"exact_match": 50.67703831748776,
"f1": 58.73053658167741,
"step": 13304
},
{
"epoch": 1.0,
"learning_rate": 6.657646823010624e-06,
"loss": 0.1773,
"step": 13350
},
{
"epoch": 1.01,
"learning_rate": 6.645119262377231e-06,
"loss": 0.1641,
"step": 13400
},
{
"epoch": 1.01,
"learning_rate": 6.6325917017438365e-06,
"loss": 0.1244,
"step": 13450
},
{
"epoch": 1.01,
"learning_rate": 6.6200641411104436e-06,
"loss": 0.1949,
"step": 13500
},
{
"epoch": 1.02,
"learning_rate": 6.60753658047705e-06,
"loss": 0.1713,
"step": 13550
},
{
"epoch": 1.02,
"learning_rate": 6.595009019843657e-06,
"loss": 0.1529,
"step": 13600
},
{
"epoch": 1.03,
"learning_rate": 6.582481459210262e-06,
"loss": 0.1954,
"step": 13650
},
{
"epoch": 1.03,
"learning_rate": 6.569953898576869e-06,
"loss": 0.1638,
"step": 13700
},
{
"epoch": 1.03,
"learning_rate": 6.557426337943476e-06,
"loss": 0.1552,
"step": 13750
},
{
"epoch": 1.04,
"learning_rate": 6.544898777310083e-06,
"loss": 0.1747,
"step": 13800
},
{
"epoch": 1.04,
"learning_rate": 6.53237121667669e-06,
"loss": 0.1706,
"step": 13850
},
{
"epoch": 1.04,
"learning_rate": 6.519843656043295e-06,
"loss": 0.1998,
"step": 13900
},
{
"epoch": 1.05,
"learning_rate": 6.507316095409902e-06,
"loss": 0.1637,
"step": 13950
},
{
"epoch": 1.05,
"learning_rate": 6.494788534776509e-06,
"loss": 0.1617,
"step": 14000
},
{
"epoch": 1.06,
"learning_rate": 6.482260974143115e-06,
"loss": 0.1525,
"step": 14050
},
{
"epoch": 1.06,
"learning_rate": 6.4697334135097225e-06,
"loss": 0.1929,
"step": 14100
},
{
"epoch": 1.06,
"learning_rate": 6.457205852876328e-06,
"loss": 0.2203,
"step": 14150
},
{
"epoch": 1.07,
"learning_rate": 6.444678292242935e-06,
"loss": 0.1827,
"step": 14200
},
{
"epoch": 1.07,
"learning_rate": 6.432150731609541e-06,
"loss": 0.1955,
"step": 14250
},
{
"epoch": 1.07,
"learning_rate": 6.419623170976148e-06,
"loss": 0.1926,
"step": 14300
},
{
"epoch": 1.08,
"learning_rate": 6.407095610342755e-06,
"loss": 0.1723,
"step": 14350
},
{
"epoch": 1.08,
"learning_rate": 6.394568049709361e-06,
"loss": 0.1696,
"step": 14400
},
{
"epoch": 1.09,
"learning_rate": 6.382040489075968e-06,
"loss": 0.1261,
"step": 14450
},
{
"epoch": 1.09,
"learning_rate": 6.369512928442574e-06,
"loss": 0.2407,
"step": 14500
},
{
"epoch": 1.09,
"learning_rate": 6.356985367809181e-06,
"loss": 0.2039,
"step": 14550
},
{
"epoch": 1.1,
"learning_rate": 6.344457807175786e-06,
"loss": 0.1747,
"step": 14600
},
{
"epoch": 1.1,
"learning_rate": 6.3319302465423934e-06,
"loss": 0.1654,
"step": 14650
},
{
"epoch": 1.1,
"learning_rate": 6.3194026859090005e-06,
"loss": 0.1957,
"step": 14700
},
{
"epoch": 1.11,
"learning_rate": 6.306875125275607e-06,
"loss": 0.1789,
"step": 14750
},
{
"epoch": 1.11,
"learning_rate": 6.294347564642214e-06,
"loss": 0.1771,
"step": 14800
},
{
"epoch": 1.12,
"learning_rate": 6.281820004008819e-06,
"loss": 0.1725,
"step": 14850
},
{
"epoch": 1.12,
"learning_rate": 6.269292443375426e-06,
"loss": 0.1903,
"step": 14900
},
{
"epoch": 1.12,
"learning_rate": 6.256764882742033e-06,
"loss": 0.1681,
"step": 14950
},
{
"epoch": 1.13,
"learning_rate": 6.2442373221086395e-06,
"loss": 0.1824,
"step": 15000
},
{
"epoch": 1.13,
"learning_rate": 6.231709761475247e-06,
"loss": 0.1126,
"step": 15050
},
{
"epoch": 1.13,
"learning_rate": 6.219182200841852e-06,
"loss": 0.1662,
"step": 15100
},
{
"epoch": 1.14,
"learning_rate": 6.206654640208459e-06,
"loss": 0.2035,
"step": 15150
},
{
"epoch": 1.14,
"learning_rate": 6.194127079575065e-06,
"loss": 0.1658,
"step": 15200
},
{
"epoch": 1.15,
"learning_rate": 6.181599518941672e-06,
"loss": 0.1869,
"step": 15250
},
{
"epoch": 1.15,
"learning_rate": 6.169071958308279e-06,
"loss": 0.1606,
"step": 15300
},
{
"epoch": 1.15,
"learning_rate": 6.156544397674885e-06,
"loss": 0.1854,
"step": 15350
},
{
"epoch": 1.16,
"learning_rate": 6.144016837041492e-06,
"loss": 0.1845,
"step": 15400
},
{
"epoch": 1.16,
"learning_rate": 6.131489276408098e-06,
"loss": 0.1742,
"step": 15450
},
{
"epoch": 1.17,
"learning_rate": 6.118961715774705e-06,
"loss": 0.1677,
"step": 15500
},
{
"epoch": 1.17,
"learning_rate": 6.1064341551413105e-06,
"loss": 0.1238,
"step": 15550
},
{
"epoch": 1.17,
"learning_rate": 6.093906594507918e-06,
"loss": 0.224,
"step": 15600
},
{
"epoch": 1.18,
"learning_rate": 6.081379033874525e-06,
"loss": 0.2019,
"step": 15650
},
{
"epoch": 1.18,
"learning_rate": 6.068851473241131e-06,
"loss": 0.1631,
"step": 15700
},
{
"epoch": 1.18,
"learning_rate": 6.056323912607738e-06,
"loss": 0.2136,
"step": 15750
},
{
"epoch": 1.19,
"learning_rate": 6.043796351974343e-06,
"loss": 0.1555,
"step": 15800
},
{
"epoch": 1.19,
"learning_rate": 6.03126879134095e-06,
"loss": 0.1692,
"step": 15850
},
{
"epoch": 1.2,
"learning_rate": 6.0187412307075575e-06,
"loss": 0.1336,
"step": 15900
},
{
"epoch": 1.2,
"learning_rate": 6.006213670074164e-06,
"loss": 0.1642,
"step": 15950
},
{
"epoch": 1.2,
"learning_rate": 5.993686109440771e-06,
"loss": 0.2149,
"step": 16000
},
{
"epoch": 1.21,
"learning_rate": 5.981158548807376e-06,
"loss": 0.1683,
"step": 16050
},
{
"epoch": 1.21,
"learning_rate": 5.968630988173983e-06,
"loss": 0.1604,
"step": 16100
},
{
"epoch": 1.21,
"learning_rate": 5.956103427540589e-06,
"loss": 0.1808,
"step": 16150
},
{
"epoch": 1.22,
"learning_rate": 5.9435758669071965e-06,
"loss": 0.169,
"step": 16200
},
{
"epoch": 1.22,
"learning_rate": 5.9310483062738036e-06,
"loss": 0.195,
"step": 16250
},
{
"epoch": 1.23,
"learning_rate": 5.918520745640409e-06,
"loss": 0.1336,
"step": 16300
},
{
"epoch": 1.23,
"learning_rate": 5.905993185007016e-06,
"loss": 0.1956,
"step": 16350
},
{
"epoch": 1.23,
"learning_rate": 5.893465624373622e-06,
"loss": 0.1569,
"step": 16400
},
{
"epoch": 1.24,
"learning_rate": 5.880938063740229e-06,
"loss": 0.1466,
"step": 16450
},
{
"epoch": 1.24,
"learning_rate": 5.868410503106835e-06,
"loss": 0.1757,
"step": 16500
},
{
"epoch": 1.24,
"learning_rate": 5.855882942473442e-06,
"loss": 0.1571,
"step": 16550
},
{
"epoch": 1.25,
"learning_rate": 5.843355381840049e-06,
"loss": 0.2717,
"step": 16600
},
{
"epoch": 1.25,
"learning_rate": 5.830827821206655e-06,
"loss": 0.1132,
"step": 16650
},
{
"epoch": 1.26,
"learning_rate": 5.818300260573262e-06,
"loss": 0.1617,
"step": 16700
},
{
"epoch": 1.26,
"learning_rate": 5.8057726999398675e-06,
"loss": 0.2091,
"step": 16750
},
{
"epoch": 1.26,
"learning_rate": 5.7932451393064745e-06,
"loss": 0.1631,
"step": 16800
},
{
"epoch": 1.27,
"learning_rate": 5.780717578673081e-06,
"loss": 0.1768,
"step": 16850
},
{
"epoch": 1.27,
"learning_rate": 5.768190018039688e-06,
"loss": 0.1947,
"step": 16900
},
{
"epoch": 1.27,
"learning_rate": 5.755662457406295e-06,
"loss": 0.173,
"step": 16950
},
{
"epoch": 1.28,
"learning_rate": 5.7431348967729e-06,
"loss": 0.2037,
"step": 17000
},
{
"epoch": 1.28,
"learning_rate": 5.730607336139507e-06,
"loss": 0.2082,
"step": 17050
},
{
"epoch": 1.29,
"learning_rate": 5.7180797755061136e-06,
"loss": 0.162,
"step": 17100
},
{
"epoch": 1.29,
"learning_rate": 5.705552214872721e-06,
"loss": 0.1825,
"step": 17150
},
{
"epoch": 1.29,
"learning_rate": 5.693024654239328e-06,
"loss": 0.1638,
"step": 17200
},
{
"epoch": 1.3,
"learning_rate": 5.680497093605933e-06,
"loss": 0.1579,
"step": 17250
},
{
"epoch": 1.3,
"learning_rate": 5.66796953297254e-06,
"loss": 0.124,
"step": 17300
},
{
"epoch": 1.3,
"learning_rate": 5.655441972339146e-06,
"loss": 0.1596,
"step": 17350
},
{
"epoch": 1.31,
"learning_rate": 5.6429144117057534e-06,
"loss": 0.1544,
"step": 17400
},
{
"epoch": 1.31,
"learning_rate": 5.630386851072359e-06,
"loss": 0.1865,
"step": 17450
},
{
"epoch": 1.32,
"learning_rate": 5.617859290438966e-06,
"loss": 0.1335,
"step": 17500
},
{
"epoch": 1.32,
"learning_rate": 5.605331729805573e-06,
"loss": 0.1816,
"step": 17550
},
{
"epoch": 1.32,
"learning_rate": 5.592804169172179e-06,
"loss": 0.1921,
"step": 17600
},
{
"epoch": 1.33,
"learning_rate": 5.580276608538786e-06,
"loss": 0.1669,
"step": 17650
},
{
"epoch": 1.33,
"learning_rate": 5.567749047905392e-06,
"loss": 0.1636,
"step": 17700
},
{
"epoch": 1.33,
"learning_rate": 5.555221487271999e-06,
"loss": 0.2091,
"step": 17750
},
{
"epoch": 1.34,
"learning_rate": 5.542693926638605e-06,
"loss": 0.1951,
"step": 17800
},
{
"epoch": 1.34,
"learning_rate": 5.530166366005212e-06,
"loss": 0.2197,
"step": 17850
},
{
"epoch": 1.35,
"learning_rate": 5.517638805371819e-06,
"loss": 0.1353,
"step": 17900
},
{
"epoch": 1.35,
"learning_rate": 5.5051112447384244e-06,
"loss": 0.1587,
"step": 17950
},
{
"epoch": 1.35,
"learning_rate": 5.4925836841050315e-06,
"loss": 0.1769,
"step": 18000
},
{
"epoch": 1.36,
"learning_rate": 5.480056123471638e-06,
"loss": 0.1714,
"step": 18050
},
{
"epoch": 1.36,
"learning_rate": 5.467528562838245e-06,
"loss": 0.1517,
"step": 18100
},
{
"epoch": 1.36,
"learning_rate": 5.455001002204852e-06,
"loss": 0.2699,
"step": 18150
},
{
"epoch": 1.37,
"learning_rate": 5.442473441571457e-06,
"loss": 0.1771,
"step": 18200
},
{
"epoch": 1.37,
"learning_rate": 5.429945880938064e-06,
"loss": 0.1817,
"step": 18250
},
{
"epoch": 1.38,
"learning_rate": 5.4174183203046705e-06,
"loss": 0.1863,
"step": 18300
},
{
"epoch": 1.38,
"learning_rate": 5.404890759671278e-06,
"loss": 0.1275,
"step": 18350
},
{
"epoch": 1.38,
"learning_rate": 5.392363199037883e-06,
"loss": 0.1511,
"step": 18400
},
{
"epoch": 1.39,
"learning_rate": 5.37983563840449e-06,
"loss": 0.1792,
"step": 18450
},
{
"epoch": 1.39,
"learning_rate": 5.367308077771097e-06,
"loss": 0.2001,
"step": 18500
},
{
"epoch": 1.39,
"learning_rate": 5.354780517137703e-06,
"loss": 0.1179,
"step": 18550
},
{
"epoch": 1.4,
"learning_rate": 5.34225295650431e-06,
"loss": 0.2148,
"step": 18600
},
{
"epoch": 1.4,
"learning_rate": 5.329725395870916e-06,
"loss": 0.1888,
"step": 18650
},
{
"epoch": 1.41,
"learning_rate": 5.317197835237523e-06,
"loss": 0.1352,
"step": 18700
},
{
"epoch": 1.41,
"learning_rate": 5.304670274604129e-06,
"loss": 0.1844,
"step": 18750
},
{
"epoch": 1.41,
"learning_rate": 5.292142713970736e-06,
"loss": 0.1412,
"step": 18800
},
{
"epoch": 1.42,
"learning_rate": 5.279615153337343e-06,
"loss": 0.127,
"step": 18850
},
{
"epoch": 1.42,
"learning_rate": 5.2670875927039486e-06,
"loss": 0.2379,
"step": 18900
},
{
"epoch": 1.42,
"learning_rate": 5.254560032070556e-06,
"loss": 0.1905,
"step": 18950
},
{
"epoch": 1.43,
"learning_rate": 5.242032471437162e-06,
"loss": 0.1319,
"step": 19000
},
{
"epoch": 1.43,
"learning_rate": 5.229755462016437e-06,
"loss": 0.2084,
"step": 19050
},
{
"epoch": 1.44,
"learning_rate": 5.217227901383043e-06,
"loss": 0.1748,
"step": 19100
},
{
"epoch": 1.44,
"learning_rate": 5.20470034074965e-06,
"loss": 0.1661,
"step": 19150
},
{
"epoch": 1.44,
"learning_rate": 5.192172780116256e-06,
"loss": 0.1644,
"step": 19200
},
{
"epoch": 1.45,
"learning_rate": 5.179645219482863e-06,
"loss": 0.1735,
"step": 19250
},
{
"epoch": 1.45,
"learning_rate": 5.16711765884947e-06,
"loss": 0.1325,
"step": 19300
},
{
"epoch": 1.45,
"learning_rate": 5.154590098216076e-06,
"loss": 0.183,
"step": 19350
},
{
"epoch": 1.46,
"learning_rate": 5.142062537582683e-06,
"loss": 0.134,
"step": 19400
},
{
"epoch": 1.46,
"learning_rate": 5.129534976949289e-06,
"loss": 0.1634,
"step": 19450
},
{
"epoch": 1.47,
"learning_rate": 5.117007416315896e-06,
"loss": 0.1737,
"step": 19500
},
{
"epoch": 1.47,
"learning_rate": 5.104479855682501e-06,
"loss": 0.1713,
"step": 19550
},
{
"epoch": 1.47,
"learning_rate": 5.091952295049108e-06,
"loss": 0.1965,
"step": 19600
},
{
"epoch": 1.48,
"learning_rate": 5.0794247344157155e-06,
"loss": 0.1532,
"step": 19650
},
{
"epoch": 1.48,
"learning_rate": 5.066897173782322e-06,
"loss": 0.1611,
"step": 19700
},
{
"epoch": 1.48,
"learning_rate": 5.054369613148929e-06,
"loss": 0.17,
"step": 19750
},
{
"epoch": 1.49,
"learning_rate": 5.041842052515534e-06,
"loss": 0.1967,
"step": 19800
},
{
"epoch": 1.49,
"learning_rate": 5.029314491882141e-06,
"loss": 0.1759,
"step": 19850
},
{
"epoch": 1.5,
"learning_rate": 5.0167869312487474e-06,
"loss": 0.1987,
"step": 19900
},
{
"epoch": 1.5,
"learning_rate": 5.0042593706153545e-06,
"loss": 0.196,
"step": 19950
},
{
"epoch": 1.5,
"learning_rate": 4.991731809981961e-06,
"loss": 0.1686,
"step": 20000
},
{
"epoch": 1.51,
"learning_rate": 4.979204249348567e-06,
"loss": 0.1594,
"step": 20050
},
{
"epoch": 1.51,
"learning_rate": 4.966676688715174e-06,
"loss": 0.2131,
"step": 20100
},
{
"epoch": 1.51,
"learning_rate": 4.95414912808178e-06,
"loss": 0.1629,
"step": 20150
},
{
"epoch": 1.52,
"learning_rate": 4.941621567448387e-06,
"loss": 0.1937,
"step": 20200
},
{
"epoch": 1.52,
"learning_rate": 4.9290940068149935e-06,
"loss": 0.1514,
"step": 20250
},
{
"epoch": 1.53,
"learning_rate": 4.9165664461816e-06,
"loss": 0.2125,
"step": 20300
},
{
"epoch": 1.53,
"learning_rate": 4.904038885548206e-06,
"loss": 0.1556,
"step": 20350
},
{
"epoch": 1.53,
"learning_rate": 4.891511324914813e-06,
"loss": 0.2029,
"step": 20400
},
{
"epoch": 1.54,
"learning_rate": 4.87898376428142e-06,
"loss": 0.1697,
"step": 20450
},
{
"epoch": 1.54,
"learning_rate": 4.866456203648026e-06,
"loss": 0.1713,
"step": 20500
},
{
"epoch": 1.54,
"learning_rate": 4.8539286430146325e-06,
"loss": 0.2214,
"step": 20550
},
{
"epoch": 1.55,
"learning_rate": 4.841401082381239e-06,
"loss": 0.187,
"step": 20600
},
{
"epoch": 1.55,
"learning_rate": 4.828873521747846e-06,
"loss": 0.184,
"step": 20650
},
{
"epoch": 1.56,
"learning_rate": 4.816345961114452e-06,
"loss": 0.1558,
"step": 20700
},
{
"epoch": 1.56,
"learning_rate": 4.803818400481059e-06,
"loss": 0.1726,
"step": 20750
},
{
"epoch": 1.56,
"learning_rate": 4.791290839847665e-06,
"loss": 0.1698,
"step": 20800
},
{
"epoch": 1.57,
"learning_rate": 4.7787632792142716e-06,
"loss": 0.1554,
"step": 20850
},
{
"epoch": 1.57,
"learning_rate": 4.766235718580879e-06,
"loss": 0.1194,
"step": 20900
},
{
"epoch": 1.57,
"learning_rate": 4.753708157947485e-06,
"loss": 0.1769,
"step": 20950
},
{
"epoch": 1.58,
"learning_rate": 4.741180597314091e-06,
"loss": 0.1527,
"step": 21000
},
{
"epoch": 1.58,
"learning_rate": 4.728653036680698e-06,
"loss": 0.1611,
"step": 21050
},
{
"epoch": 1.59,
"learning_rate": 4.716125476047304e-06,
"loss": 0.1656,
"step": 21100
},
{
"epoch": 1.59,
"learning_rate": 4.7035979154139114e-06,
"loss": 0.1757,
"step": 21150
},
{
"epoch": 1.59,
"learning_rate": 4.691070354780518e-06,
"loss": 0.1548,
"step": 21200
},
{
"epoch": 1.6,
"learning_rate": 4.678542794147124e-06,
"loss": 0.1966,
"step": 21250
},
{
"epoch": 1.6,
"learning_rate": 4.66601523351373e-06,
"loss": 0.2066,
"step": 21300
},
{
"epoch": 1.6,
"learning_rate": 4.653487672880337e-06,
"loss": 0.1643,
"step": 21350
},
{
"epoch": 1.61,
"learning_rate": 4.640960112246944e-06,
"loss": 0.1381,
"step": 21400
},
{
"epoch": 1.61,
"learning_rate": 4.6284325516135505e-06,
"loss": 0.192,
"step": 21450
},
{
"epoch": 1.62,
"learning_rate": 4.615904990980157e-06,
"loss": 0.1672,
"step": 21500
},
{
"epoch": 1.62,
"learning_rate": 4.603377430346763e-06,
"loss": 0.1314,
"step": 21550
},
{
"epoch": 1.62,
"learning_rate": 4.59084986971337e-06,
"loss": 0.1535,
"step": 21600
},
{
"epoch": 1.63,
"learning_rate": 4.578322309079976e-06,
"loss": 0.1496,
"step": 21650
},
{
"epoch": 1.63,
"learning_rate": 4.565794748446583e-06,
"loss": 0.2039,
"step": 21700
},
{
"epoch": 1.63,
"learning_rate": 4.5532671878131895e-06,
"loss": 0.1683,
"step": 21750
},
{
"epoch": 1.64,
"learning_rate": 4.540739627179796e-06,
"loss": 0.1543,
"step": 21800
},
{
"epoch": 1.64,
"learning_rate": 4.528212066546403e-06,
"loss": 0.1674,
"step": 21850
},
{
"epoch": 1.65,
"learning_rate": 4.515684505913009e-06,
"loss": 0.1616,
"step": 21900
},
{
"epoch": 1.65,
"learning_rate": 4.503156945279615e-06,
"loss": 0.1564,
"step": 21950
},
{
"epoch": 1.65,
"learning_rate": 4.4906293846462215e-06,
"loss": 0.2053,
"step": 22000
},
{
"epoch": 1.66,
"learning_rate": 4.4781018240128285e-06,
"loss": 0.1983,
"step": 22050
},
{
"epoch": 1.66,
"learning_rate": 4.465574263379436e-06,
"loss": 0.1356,
"step": 22100
},
{
"epoch": 1.66,
"learning_rate": 4.4532972539587095e-06,
"loss": 0.1438,
"step": 22150
},
{
"epoch": 1.67,
"learning_rate": 4.440769693325316e-06,
"loss": 0.1685,
"step": 22200
},
{
"epoch": 1.67,
"learning_rate": 4.428242132691923e-06,
"loss": 0.1975,
"step": 22250
},
{
"epoch": 1.68,
"learning_rate": 4.415714572058529e-06,
"loss": 0.154,
"step": 22300
},
{
"epoch": 1.68,
"learning_rate": 4.403187011425136e-06,
"loss": 0.1801,
"step": 22350
},
{
"epoch": 1.68,
"learning_rate": 4.390659450791742e-06,
"loss": 0.1704,
"step": 22400
},
{
"epoch": 1.69,
"learning_rate": 4.3781318901583485e-06,
"loss": 0.1482,
"step": 22450
},
{
"epoch": 1.69,
"learning_rate": 4.365604329524955e-06,
"loss": 0.1672,
"step": 22500
},
{
"epoch": 1.69,
"learning_rate": 4.353076768891562e-06,
"loss": 0.1976,
"step": 22550
},
{
"epoch": 1.7,
"learning_rate": 4.340549208258169e-06,
"loss": 0.1454,
"step": 22600
},
{
"epoch": 1.7,
"learning_rate": 4.328021647624775e-06,
"loss": 0.1509,
"step": 22650
},
{
"epoch": 1.71,
"learning_rate": 4.315494086991381e-06,
"loss": 0.188,
"step": 22700
},
{
"epoch": 1.71,
"learning_rate": 4.3029665263579875e-06,
"loss": 0.123,
"step": 22750
},
{
"epoch": 1.71,
"learning_rate": 4.290438965724595e-06,
"loss": 0.1235,
"step": 22800
},
{
"epoch": 1.72,
"learning_rate": 4.277911405091201e-06,
"loss": 0.1649,
"step": 22850
},
{
"epoch": 1.72,
"learning_rate": 4.265383844457808e-06,
"loss": 0.1943,
"step": 22900
},
{
"epoch": 1.73,
"learning_rate": 4.252856283824414e-06,
"loss": 0.2243,
"step": 22950
},
{
"epoch": 1.73,
"learning_rate": 4.24032872319102e-06,
"loss": 0.1624,
"step": 23000
},
{
"epoch": 1.73,
"learning_rate": 4.227801162557627e-06,
"loss": 0.1998,
"step": 23050
},
{
"epoch": 1.74,
"learning_rate": 4.215273601924234e-06,
"loss": 0.228,
"step": 23100
},
{
"epoch": 1.74,
"learning_rate": 4.20274604129084e-06,
"loss": 0.1541,
"step": 23150
},
{
"epoch": 1.74,
"learning_rate": 4.190218480657447e-06,
"loss": 0.1396,
"step": 23200
},
{
"epoch": 1.75,
"learning_rate": 4.177690920024053e-06,
"loss": 0.155,
"step": 23250
},
{
"epoch": 1.75,
"learning_rate": 4.16516335939066e-06,
"loss": 0.1451,
"step": 23300
},
{
"epoch": 1.76,
"learning_rate": 4.152635798757266e-06,
"loss": 0.1628,
"step": 23350
},
{
"epoch": 1.76,
"learning_rate": 4.140108238123873e-06,
"loss": 0.1594,
"step": 23400
},
{
"epoch": 1.76,
"learning_rate": 4.127580677490479e-06,
"loss": 0.1636,
"step": 23450
},
{
"epoch": 1.77,
"learning_rate": 4.1153036680697536e-06,
"loss": 0.1437,
"step": 23500
},
{
"epoch": 1.77,
"learning_rate": 4.102776107436361e-06,
"loss": 0.179,
"step": 23550
},
{
"epoch": 1.77,
"learning_rate": 4.090248546802967e-06,
"loss": 0.1833,
"step": 23600
},
{
"epoch": 1.78,
"learning_rate": 4.077720986169573e-06,
"loss": 0.1654,
"step": 23650
},
{
"epoch": 1.78,
"learning_rate": 4.065193425536179e-06,
"loss": 0.143,
"step": 23700
},
{
"epoch": 1.79,
"learning_rate": 4.052665864902786e-06,
"loss": 0.1591,
"step": 23750
},
{
"epoch": 1.79,
"learning_rate": 4.0401383042693934e-06,
"loss": 0.1652,
"step": 23800
},
{
"epoch": 1.79,
"learning_rate": 4.027610743636e-06,
"loss": 0.2014,
"step": 23850
},
{
"epoch": 1.8,
"learning_rate": 4.015083183002606e-06,
"loss": 0.1846,
"step": 23900
},
{
"epoch": 1.8,
"learning_rate": 4.002555622369212e-06,
"loss": 0.1707,
"step": 23950
},
{
"epoch": 1.8,
"learning_rate": 3.990028061735819e-06,
"loss": 0.1681,
"step": 24000
},
{
"epoch": 1.81,
"learning_rate": 3.977500501102426e-06,
"loss": 0.1255,
"step": 24050
},
{
"epoch": 1.81,
"learning_rate": 3.9649729404690325e-06,
"loss": 0.133,
"step": 24100
},
{
"epoch": 1.82,
"learning_rate": 3.952445379835639e-06,
"loss": 0.1571,
"step": 24150
},
{
"epoch": 1.82,
"learning_rate": 3.939917819202245e-06,
"loss": 0.1481,
"step": 24200
},
{
"epoch": 1.82,
"learning_rate": 3.927390258568852e-06,
"loss": 0.1768,
"step": 24250
},
{
"epoch": 1.83,
"learning_rate": 3.914862697935458e-06,
"loss": 0.1688,
"step": 24300
},
{
"epoch": 1.83,
"learning_rate": 3.902335137302064e-06,
"loss": 0.1771,
"step": 24350
},
{
"epoch": 1.83,
"learning_rate": 3.8898075766686715e-06,
"loss": 0.1335,
"step": 24400
},
{
"epoch": 1.84,
"learning_rate": 3.877280016035278e-06,
"loss": 0.1584,
"step": 24450
},
{
"epoch": 1.84,
"learning_rate": 3.864752455401885e-06,
"loss": 0.1639,
"step": 24500
},
{
"epoch": 1.85,
"learning_rate": 3.852224894768491e-06,
"loss": 0.2169,
"step": 24550
},
{
"epoch": 1.85,
"learning_rate": 3.839697334135097e-06,
"loss": 0.172,
"step": 24600
},
{
"epoch": 1.85,
"learning_rate": 3.8271697735017034e-06,
"loss": 0.1578,
"step": 24650
},
{
"epoch": 1.86,
"learning_rate": 3.814642212868311e-06,
"loss": 0.2391,
"step": 24700
},
{
"epoch": 1.86,
"learning_rate": 3.802114652234917e-06,
"loss": 0.1298,
"step": 24750
},
{
"epoch": 1.86,
"learning_rate": 3.789587091601524e-06,
"loss": 0.1388,
"step": 24800
},
{
"epoch": 1.87,
"learning_rate": 3.77705953096813e-06,
"loss": 0.1849,
"step": 24850
},
{
"epoch": 1.87,
"learning_rate": 3.7645319703347367e-06,
"loss": 0.1633,
"step": 24900
},
{
"epoch": 1.88,
"learning_rate": 3.752004409701343e-06,
"loss": 0.1805,
"step": 24950
},
{
"epoch": 1.88,
"learning_rate": 3.73947684906795e-06,
"loss": 0.1458,
"step": 25000
},
{
"epoch": 1.88,
"learning_rate": 3.7269492884345566e-06,
"loss": 0.1759,
"step": 25050
},
{
"epoch": 1.89,
"learning_rate": 3.714421727801163e-06,
"loss": 0.1507,
"step": 25100
},
{
"epoch": 1.89,
"learning_rate": 3.7018941671677695e-06,
"loss": 0.1418,
"step": 25150
},
{
"epoch": 1.89,
"learning_rate": 3.6893666065343757e-06,
"loss": 0.159,
"step": 25200
},
{
"epoch": 1.9,
"learning_rate": 3.6768390459009823e-06,
"loss": 0.1676,
"step": 25250
},
{
"epoch": 1.9,
"learning_rate": 3.6643114852675886e-06,
"loss": 0.1894,
"step": 25300
},
{
"epoch": 1.91,
"learning_rate": 3.6517839246341956e-06,
"loss": 0.1964,
"step": 25350
},
{
"epoch": 1.91,
"learning_rate": 3.6392563640008023e-06,
"loss": 0.1178,
"step": 25400
},
{
"epoch": 1.91,
"learning_rate": 3.6267288033674085e-06,
"loss": 0.1419,
"step": 25450
},
{
"epoch": 1.92,
"learning_rate": 3.614201242734015e-06,
"loss": 0.1724,
"step": 25500
},
{
"epoch": 1.92,
"learning_rate": 3.6016736821006214e-06,
"loss": 0.1865,
"step": 25550
},
{
"epoch": 1.92,
"learning_rate": 3.589146121467228e-06,
"loss": 0.1528,
"step": 25600
},
{
"epoch": 1.93,
"learning_rate": 3.576618560833835e-06,
"loss": 0.1541,
"step": 25650
},
{
"epoch": 1.93,
"learning_rate": 3.5640910002004413e-06,
"loss": 0.1823,
"step": 25700
},
{
"epoch": 1.94,
"learning_rate": 3.551563439567048e-06,
"loss": 0.154,
"step": 25750
},
{
"epoch": 1.94,
"learning_rate": 3.539035878933654e-06,
"loss": 0.1809,
"step": 25800
},
{
"epoch": 1.94,
"learning_rate": 3.526508318300261e-06,
"loss": 0.1521,
"step": 25850
},
{
"epoch": 1.95,
"learning_rate": 3.513980757666867e-06,
"loss": 0.1799,
"step": 25900
},
{
"epoch": 1.95,
"learning_rate": 3.501453197033474e-06,
"loss": 0.1869,
"step": 25950
},
{
"epoch": 1.95,
"learning_rate": 3.4889256364000808e-06,
"loss": 0.1046,
"step": 26000
},
{
"epoch": 1.96,
"learning_rate": 3.476398075766687e-06,
"loss": 0.1949,
"step": 26050
},
{
"epoch": 1.96,
"learning_rate": 3.4638705151332936e-06,
"loss": 0.1614,
"step": 26100
},
{
"epoch": 1.97,
"learning_rate": 3.4513429544999e-06,
"loss": 0.1762,
"step": 26150
},
{
"epoch": 1.97,
"learning_rate": 3.4388153938665065e-06,
"loss": 0.1222,
"step": 26200
},
{
"epoch": 1.97,
"learning_rate": 3.4262878332331127e-06,
"loss": 0.182,
"step": 26250
},
{
"epoch": 1.98,
"learning_rate": 3.4137602725997198e-06,
"loss": 0.1699,
"step": 26300
},
{
"epoch": 1.98,
"learning_rate": 3.4012327119663264e-06,
"loss": 0.125,
"step": 26350
},
{
"epoch": 1.98,
"learning_rate": 3.3887051513329327e-06,
"loss": 0.1787,
"step": 26400
},
{
"epoch": 1.99,
"learning_rate": 3.3761775906995393e-06,
"loss": 0.1633,
"step": 26450
},
{
"epoch": 1.99,
"learning_rate": 3.3636500300661455e-06,
"loss": 0.1523,
"step": 26500
},
{
"epoch": 2.0,
"learning_rate": 3.351122469432752e-06,
"loss": 0.1927,
"step": 26550
},
{
"epoch": 2.0,
"learning_rate": 3.3385949087993592e-06,
"loss": 0.1643,
"step": 26600
},
{
"epoch": 2.0,
"exact_match": 51.08038029386344,
"f1": 59.25556094017093,
"step": 26608
},
{
"epoch": 2.0,
"learning_rate": 3.3260673481659655e-06,
"loss": 0.0722,
"step": 26650
},
{
"epoch": 2.01,
"learning_rate": 3.313539787532572e-06,
"loss": 0.1278,
"step": 26700
},
{
"epoch": 2.01,
"learning_rate": 3.3010122268991783e-06,
"loss": 0.0787,
"step": 26750
},
{
"epoch": 2.01,
"learning_rate": 3.288484666265785e-06,
"loss": 0.0707,
"step": 26800
},
{
"epoch": 2.02,
"learning_rate": 3.275957105632391e-06,
"loss": 0.1228,
"step": 26850
},
{
"epoch": 2.02,
"learning_rate": 3.2634295449989983e-06,
"loss": 0.1,
"step": 26900
},
{
"epoch": 2.03,
"learning_rate": 3.250901984365605e-06,
"loss": 0.1194,
"step": 26950
},
{
"epoch": 2.03,
"learning_rate": 3.238374423732211e-06,
"loss": 0.0841,
"step": 27000
},
{
"epoch": 2.03,
"learning_rate": 3.2258468630988178e-06,
"loss": 0.1544,
"step": 27050
},
{
"epoch": 2.04,
"learning_rate": 3.213319302465424e-06,
"loss": 0.0739,
"step": 27100
},
{
"epoch": 2.04,
"learning_rate": 3.2007917418320306e-06,
"loss": 0.1805,
"step": 27150
},
{
"epoch": 2.04,
"learning_rate": 3.188264181198637e-06,
"loss": 0.0618,
"step": 27200
},
{
"epoch": 2.05,
"learning_rate": 3.175736620565244e-06,
"loss": 0.0966,
"step": 27250
},
{
"epoch": 2.05,
"learning_rate": 3.1632090599318506e-06,
"loss": 0.1268,
"step": 27300
},
{
"epoch": 2.06,
"learning_rate": 3.150681499298457e-06,
"loss": 0.0946,
"step": 27350
},
{
"epoch": 2.06,
"learning_rate": 3.1381539386650634e-06,
"loss": 0.0949,
"step": 27400
},
{
"epoch": 2.06,
"learning_rate": 3.1256263780316697e-06,
"loss": 0.0996,
"step": 27450
},
{
"epoch": 2.07,
"learning_rate": 3.1130988173982763e-06,
"loss": 0.0612,
"step": 27500
},
{
"epoch": 2.07,
"learning_rate": 3.1005712567648834e-06,
"loss": 0.0901,
"step": 27550
},
{
"epoch": 2.07,
"learning_rate": 3.0880436961314896e-06,
"loss": 0.1236,
"step": 27600
},
{
"epoch": 2.08,
"learning_rate": 3.0755161354980963e-06,
"loss": 0.0711,
"step": 27650
},
{
"epoch": 2.08,
"learning_rate": 3.0629885748647025e-06,
"loss": 0.0429,
"step": 27700
},
{
"epoch": 2.09,
"learning_rate": 3.050461014231309e-06,
"loss": 0.1027,
"step": 27750
},
{
"epoch": 2.09,
"learning_rate": 3.0379334535979153e-06,
"loss": 0.0642,
"step": 27800
},
{
"epoch": 2.09,
"learning_rate": 3.0254058929645224e-06,
"loss": 0.127,
"step": 27850
},
{
"epoch": 2.1,
"learning_rate": 3.012878332331129e-06,
"loss": 0.1012,
"step": 27900
},
{
"epoch": 2.1,
"learning_rate": 3.0003507716977353e-06,
"loss": 0.0799,
"step": 27950
},
{
"epoch": 2.1,
"learning_rate": 2.987823211064342e-06,
"loss": 0.0823,
"step": 28000
},
{
"epoch": 2.11,
"learning_rate": 2.975295650430948e-06,
"loss": 0.1013,
"step": 28050
},
{
"epoch": 2.11,
"learning_rate": 2.962768089797555e-06,
"loss": 0.0876,
"step": 28100
},
{
"epoch": 2.12,
"learning_rate": 2.950240529164161e-06,
"loss": 0.1052,
"step": 28150
},
{
"epoch": 2.12,
"learning_rate": 2.937712968530768e-06,
"loss": 0.0826,
"step": 28200
},
{
"epoch": 2.12,
"learning_rate": 2.9251854078973747e-06,
"loss": 0.0864,
"step": 28250
},
{
"epoch": 2.13,
"learning_rate": 2.912657847263981e-06,
"loss": 0.0799,
"step": 28300
},
{
"epoch": 2.13,
"learning_rate": 2.9001302866305876e-06,
"loss": 0.1057,
"step": 28350
},
{
"epoch": 2.13,
"learning_rate": 2.887602725997194e-06,
"loss": 0.091,
"step": 28400
},
{
"epoch": 2.14,
"learning_rate": 2.8750751653638005e-06,
"loss": 0.0827,
"step": 28450
},
{
"epoch": 2.14,
"learning_rate": 2.8625476047304075e-06,
"loss": 0.1339,
"step": 28500
},
{
"epoch": 2.15,
"learning_rate": 2.8500200440970138e-06,
"loss": 0.1096,
"step": 28550
},
{
"epoch": 2.15,
"learning_rate": 2.8374924834636204e-06,
"loss": 0.1513,
"step": 28600
},
{
"epoch": 2.15,
"learning_rate": 2.8249649228302266e-06,
"loss": 0.0939,
"step": 28650
},
{
"epoch": 2.16,
"learning_rate": 2.8124373621968333e-06,
"loss": 0.1388,
"step": 28700
},
{
"epoch": 2.16,
"learning_rate": 2.7999098015634395e-06,
"loss": 0.1259,
"step": 28750
},
{
"epoch": 2.16,
"learning_rate": 2.787382240930046e-06,
"loss": 0.1103,
"step": 28800
},
{
"epoch": 2.17,
"learning_rate": 2.775105231509321e-06,
"loss": 0.1445,
"step": 28850
},
{
"epoch": 2.17,
"learning_rate": 2.762577670875927e-06,
"loss": 0.0697,
"step": 28900
},
{
"epoch": 2.18,
"learning_rate": 2.7500501102425337e-06,
"loss": 0.1085,
"step": 28950
},
{
"epoch": 2.18,
"learning_rate": 2.73752254960914e-06,
"loss": 0.074,
"step": 29000
},
{
"epoch": 2.18,
"learning_rate": 2.724994988975747e-06,
"loss": 0.0952,
"step": 29050
},
{
"epoch": 2.19,
"learning_rate": 2.7124674283423536e-06,
"loss": 0.0524,
"step": 29100
},
{
"epoch": 2.19,
"learning_rate": 2.69993986770896e-06,
"loss": 0.0844,
"step": 29150
},
{
"epoch": 2.19,
"learning_rate": 2.6874123070755665e-06,
"loss": 0.1059,
"step": 29200
},
{
"epoch": 2.2,
"learning_rate": 2.6748847464421727e-06,
"loss": 0.1191,
"step": 29250
},
{
"epoch": 2.2,
"learning_rate": 2.6623571858087794e-06,
"loss": 0.113,
"step": 29300
},
{
"epoch": 2.21,
"learning_rate": 2.6498296251753865e-06,
"loss": 0.1183,
"step": 29350
},
{
"epoch": 2.21,
"learning_rate": 2.6373020645419927e-06,
"loss": 0.1229,
"step": 29400
},
{
"epoch": 2.21,
"learning_rate": 2.6247745039085993e-06,
"loss": 0.1276,
"step": 29450
},
{
"epoch": 2.22,
"learning_rate": 2.6122469432752055e-06,
"loss": 0.1084,
"step": 29500
},
{
"epoch": 2.22,
"learning_rate": 2.599719382641812e-06,
"loss": 0.0889,
"step": 29550
},
{
"epoch": 2.22,
"learning_rate": 2.5871918220084184e-06,
"loss": 0.098,
"step": 29600
},
{
"epoch": 2.23,
"learning_rate": 2.574664261375025e-06,
"loss": 0.0969,
"step": 29650
},
{
"epoch": 2.23,
"learning_rate": 2.562136700741632e-06,
"loss": 0.0944,
"step": 29700
},
{
"epoch": 2.24,
"learning_rate": 2.5496091401082383e-06,
"loss": 0.1021,
"step": 29750
},
{
"epoch": 2.24,
"learning_rate": 2.537081579474845e-06,
"loss": 0.0927,
"step": 29800
},
{
"epoch": 2.24,
"learning_rate": 2.5245540188414512e-06,
"loss": 0.0985,
"step": 29850
},
{
"epoch": 2.25,
"learning_rate": 2.512026458208058e-06,
"loss": 0.1503,
"step": 29900
},
{
"epoch": 2.25,
"learning_rate": 2.4994988975746645e-06,
"loss": 0.0803,
"step": 29950
},
{
"epoch": 2.25,
"learning_rate": 2.486971336941271e-06,
"loss": 0.1176,
"step": 30000
},
{
"epoch": 2.26,
"learning_rate": 2.4744437763078774e-06,
"loss": 0.0791,
"step": 30050
},
{
"epoch": 2.26,
"learning_rate": 2.461916215674484e-06,
"loss": 0.0985,
"step": 30100
},
{
"epoch": 2.27,
"learning_rate": 2.4493886550410907e-06,
"loss": 0.1402,
"step": 30150
},
{
"epoch": 2.27,
"learning_rate": 2.436861094407697e-06,
"loss": 0.0957,
"step": 30200
},
{
"epoch": 2.27,
"learning_rate": 2.424333533774304e-06,
"loss": 0.1503,
"step": 30250
},
{
"epoch": 2.28,
"learning_rate": 2.41180597314091e-06,
"loss": 0.1002,
"step": 30300
},
{
"epoch": 2.28,
"learning_rate": 2.399278412507517e-06,
"loss": 0.109,
"step": 30350
},
{
"epoch": 2.29,
"learning_rate": 2.3867508518741235e-06,
"loss": 0.1081,
"step": 30400
},
{
"epoch": 2.29,
"learning_rate": 2.3742232912407297e-06,
"loss": 0.1553,
"step": 30450
},
{
"epoch": 2.29,
"learning_rate": 2.3616957306073363e-06,
"loss": 0.0772,
"step": 30500
},
{
"epoch": 2.3,
"learning_rate": 2.349168169973943e-06,
"loss": 0.1092,
"step": 30550
},
{
"epoch": 2.3,
"learning_rate": 2.3366406093405496e-06,
"loss": 0.0924,
"step": 30600
},
{
"epoch": 2.3,
"learning_rate": 2.324113048707156e-06,
"loss": 0.0791,
"step": 30650
},
{
"epoch": 2.31,
"learning_rate": 2.3115854880737625e-06,
"loss": 0.1556,
"step": 30700
},
{
"epoch": 2.31,
"learning_rate": 2.299057927440369e-06,
"loss": 0.1471,
"step": 30750
},
{
"epoch": 2.32,
"learning_rate": 2.2865303668069754e-06,
"loss": 0.1204,
"step": 30800
},
{
"epoch": 2.32,
"learning_rate": 2.274002806173582e-06,
"loss": 0.1114,
"step": 30850
},
{
"epoch": 2.32,
"learning_rate": 2.2614752455401887e-06,
"loss": 0.0769,
"step": 30900
},
{
"epoch": 2.33,
"learning_rate": 2.2489476849067953e-06,
"loss": 0.0799,
"step": 30950
},
{
"epoch": 2.33,
"learning_rate": 2.2364201242734015e-06,
"loss": 0.1004,
"step": 31000
},
{
"epoch": 2.33,
"learning_rate": 2.224143114852676e-06,
"loss": 0.0891,
"step": 31050
},
{
"epoch": 2.34,
"learning_rate": 2.211615554219283e-06,
"loss": 0.0904,
"step": 31100
},
{
"epoch": 2.34,
"learning_rate": 2.199087993585889e-06,
"loss": 0.102,
"step": 31150
},
{
"epoch": 2.35,
"learning_rate": 2.1865604329524957e-06,
"loss": 0.0646,
"step": 31200
},
{
"epoch": 2.35,
"learning_rate": 2.1740328723191024e-06,
"loss": 0.0763,
"step": 31250
},
{
"epoch": 2.35,
"learning_rate": 2.1615053116857086e-06,
"loss": 0.1146,
"step": 31300
},
{
"epoch": 2.36,
"learning_rate": 2.1489777510523153e-06,
"loss": 0.0961,
"step": 31350
},
{
"epoch": 2.36,
"learning_rate": 2.136450190418922e-06,
"loss": 0.0968,
"step": 31400
},
{
"epoch": 2.36,
"learning_rate": 2.1239226297855285e-06,
"loss": 0.0856,
"step": 31450
},
{
"epoch": 2.37,
"learning_rate": 2.1113950691521348e-06,
"loss": 0.1081,
"step": 31500
},
{
"epoch": 2.37,
"learning_rate": 2.0988675085187414e-06,
"loss": 0.1148,
"step": 31550
},
{
"epoch": 2.38,
"learning_rate": 2.086339947885348e-06,
"loss": 0.0963,
"step": 31600
},
{
"epoch": 2.38,
"learning_rate": 2.0738123872519543e-06,
"loss": 0.0929,
"step": 31650
},
{
"epoch": 2.38,
"learning_rate": 2.061284826618561e-06,
"loss": 0.1196,
"step": 31700
},
{
"epoch": 2.39,
"learning_rate": 2.0487572659851676e-06,
"loss": 0.0971,
"step": 31750
},
{
"epoch": 2.39,
"learning_rate": 2.0362297053517742e-06,
"loss": 0.1274,
"step": 31800
},
{
"epoch": 2.39,
"learning_rate": 2.0237021447183804e-06,
"loss": 0.0692,
"step": 31850
},
{
"epoch": 2.4,
"learning_rate": 2.011174584084987e-06,
"loss": 0.0835,
"step": 31900
},
{
"epoch": 2.4,
"learning_rate": 1.9986470234515937e-06,
"loss": 0.1217,
"step": 31950
},
{
"epoch": 2.41,
"learning_rate": 1.9861194628182e-06,
"loss": 0.0869,
"step": 32000
},
{
"epoch": 2.41,
"learning_rate": 1.973591902184807e-06,
"loss": 0.0988,
"step": 32050
},
{
"epoch": 2.41,
"learning_rate": 1.9610643415514132e-06,
"loss": 0.1166,
"step": 32100
},
{
"epoch": 2.42,
"learning_rate": 1.94853678091802e-06,
"loss": 0.0718,
"step": 32150
},
{
"epoch": 2.42,
"learning_rate": 1.9360092202846265e-06,
"loss": 0.0431,
"step": 32200
},
{
"epoch": 2.42,
"learning_rate": 1.9234816596512328e-06,
"loss": 0.0896,
"step": 32250
},
{
"epoch": 2.43,
"learning_rate": 1.9109540990178394e-06,
"loss": 0.0971,
"step": 32300
},
{
"epoch": 2.43,
"learning_rate": 1.898426538384446e-06,
"loss": 0.0939,
"step": 32350
},
{
"epoch": 2.44,
"learning_rate": 1.8858989777510525e-06,
"loss": 0.1138,
"step": 32400
},
{
"epoch": 2.44,
"learning_rate": 1.873371417117659e-06,
"loss": 0.0826,
"step": 32450
},
{
"epoch": 2.44,
"learning_rate": 1.8608438564842653e-06,
"loss": 0.0847,
"step": 32500
},
{
"epoch": 2.45,
"learning_rate": 1.8483162958508722e-06,
"loss": 0.112,
"step": 32550
},
{
"epoch": 2.45,
"learning_rate": 1.8357887352174786e-06,
"loss": 0.126,
"step": 32600
},
{
"epoch": 2.45,
"learning_rate": 1.823261174584085e-06,
"loss": 0.0649,
"step": 32650
},
{
"epoch": 2.46,
"learning_rate": 1.8107336139506917e-06,
"loss": 0.1242,
"step": 32700
},
{
"epoch": 2.46,
"learning_rate": 1.7982060533172982e-06,
"loss": 0.1085,
"step": 32750
},
{
"epoch": 2.47,
"learning_rate": 1.7856784926839046e-06,
"loss": 0.0953,
"step": 32800
},
{
"epoch": 2.47,
"learning_rate": 1.7731509320505114e-06,
"loss": 0.0651,
"step": 32850
},
{
"epoch": 2.47,
"learning_rate": 1.7606233714171179e-06,
"loss": 0.1182,
"step": 32900
},
{
"epoch": 2.48,
"learning_rate": 1.7480958107837243e-06,
"loss": 0.0717,
"step": 32950
},
{
"epoch": 2.48,
"learning_rate": 1.735568250150331e-06,
"loss": 0.1015,
"step": 33000
},
{
"epoch": 2.48,
"learning_rate": 1.7230406895169374e-06,
"loss": 0.1009,
"step": 33050
},
{
"epoch": 2.49,
"learning_rate": 1.7105131288835438e-06,
"loss": 0.0879,
"step": 33100
},
{
"epoch": 2.49,
"learning_rate": 1.6979855682501507e-06,
"loss": 0.1216,
"step": 33150
},
{
"epoch": 2.5,
"learning_rate": 1.6854580076167571e-06,
"loss": 0.0753,
"step": 33200
},
{
"epoch": 2.5,
"learning_rate": 1.6729304469833636e-06,
"loss": 0.0711,
"step": 33250
},
{
"epoch": 2.5,
"learning_rate": 1.66040288634997e-06,
"loss": 0.1099,
"step": 33300
},
{
"epoch": 2.51,
"learning_rate": 1.6478753257165766e-06,
"loss": 0.129,
"step": 33350
},
{
"epoch": 2.51,
"learning_rate": 1.635347765083183e-06,
"loss": 0.0877,
"step": 33400
},
{
"epoch": 2.51,
"learning_rate": 1.6228202044497895e-06,
"loss": 0.1112,
"step": 33450
},
{
"epoch": 2.52,
"learning_rate": 1.6102926438163964e-06,
"loss": 0.1186,
"step": 33500
},
{
"epoch": 2.52,
"learning_rate": 1.5977650831830028e-06,
"loss": 0.0815,
"step": 33550
},
{
"epoch": 2.53,
"learning_rate": 1.5852375225496092e-06,
"loss": 0.1255,
"step": 33600
},
{
"epoch": 2.53,
"learning_rate": 1.5727099619162159e-06,
"loss": 0.0942,
"step": 33650
},
{
"epoch": 2.53,
"learning_rate": 1.5601824012828223e-06,
"loss": 0.1259,
"step": 33700
},
{
"epoch": 2.54,
"learning_rate": 1.5476548406494287e-06,
"loss": 0.0945,
"step": 33750
},
{
"epoch": 2.54,
"learning_rate": 1.5351272800160356e-06,
"loss": 0.0805,
"step": 33800
},
{
"epoch": 2.54,
"learning_rate": 1.522599719382642e-06,
"loss": 0.0851,
"step": 33850
},
{
"epoch": 2.55,
"learning_rate": 1.5100721587492485e-06,
"loss": 0.0815,
"step": 33900
},
{
"epoch": 2.55,
"learning_rate": 1.4975445981158551e-06,
"loss": 0.101,
"step": 33950
},
{
"epoch": 2.56,
"learning_rate": 1.4850170374824615e-06,
"loss": 0.1006,
"step": 34000
},
{
"epoch": 2.56,
"learning_rate": 1.472489476849068e-06,
"loss": 0.0759,
"step": 34050
},
{
"epoch": 2.56,
"learning_rate": 1.4599619162156748e-06,
"loss": 0.0765,
"step": 34100
},
{
"epoch": 2.57,
"learning_rate": 1.4474343555822813e-06,
"loss": 0.0956,
"step": 34150
},
{
"epoch": 2.57,
"learning_rate": 1.4349067949488877e-06,
"loss": 0.1312,
"step": 34200
},
{
"epoch": 2.57,
"learning_rate": 1.4223792343154941e-06,
"loss": 0.1217,
"step": 34250
},
{
"epoch": 2.58,
"learning_rate": 1.4098516736821008e-06,
"loss": 0.1011,
"step": 34300
},
{
"epoch": 2.58,
"learning_rate": 1.3973241130487072e-06,
"loss": 0.0896,
"step": 34350
},
{
"epoch": 2.59,
"learning_rate": 1.3847965524153136e-06,
"loss": 0.07,
"step": 34400
},
{
"epoch": 2.59,
"learning_rate": 1.3722689917819205e-06,
"loss": 0.0848,
"step": 34450
},
{
"epoch": 2.59,
"learning_rate": 1.359741431148527e-06,
"loss": 0.0787,
"step": 34500
},
{
"epoch": 2.6,
"learning_rate": 1.3472138705151334e-06,
"loss": 0.0794,
"step": 34550
},
{
"epoch": 2.6,
"learning_rate": 1.33468630988174e-06,
"loss": 0.0896,
"step": 34600
},
{
"epoch": 2.6,
"learning_rate": 1.3221587492483465e-06,
"loss": 0.0942,
"step": 34650
},
{
"epoch": 2.61,
"learning_rate": 1.3096311886149529e-06,
"loss": 0.0948,
"step": 34700
},
{
"epoch": 2.61,
"learning_rate": 1.2971036279815597e-06,
"loss": 0.1178,
"step": 34750
},
{
"epoch": 2.62,
"learning_rate": 1.2845760673481662e-06,
"loss": 0.0669,
"step": 34800
},
{
"epoch": 2.62,
"learning_rate": 1.2720485067147726e-06,
"loss": 0.1142,
"step": 34850
},
{
"epoch": 2.62,
"learning_rate": 1.2595209460813793e-06,
"loss": 0.0687,
"step": 34900
},
{
"epoch": 2.63,
"learning_rate": 1.2469933854479857e-06,
"loss": 0.1004,
"step": 34950
},
{
"epoch": 2.63,
"learning_rate": 1.2344658248145921e-06,
"loss": 0.0904,
"step": 35000
},
{
"epoch": 2.63,
"learning_rate": 1.2219382641811988e-06,
"loss": 0.092,
"step": 35050
},
{
"epoch": 2.64,
"learning_rate": 1.2094107035478054e-06,
"loss": 0.0983,
"step": 35100
},
{
"epoch": 2.64,
"learning_rate": 1.1968831429144118e-06,
"loss": 0.0464,
"step": 35150
},
{
"epoch": 2.65,
"learning_rate": 1.1843555822810183e-06,
"loss": 0.1265,
"step": 35200
},
{
"epoch": 2.65,
"learning_rate": 1.171828021647625e-06,
"loss": 0.0809,
"step": 35250
},
{
"epoch": 2.65,
"learning_rate": 1.1595510122268992e-06,
"loss": 0.1048,
"step": 35300
},
{
"epoch": 2.66,
"learning_rate": 1.1472740028061737e-06,
"loss": 0.1366,
"step": 35350
},
{
"epoch": 2.66,
"learning_rate": 1.1347464421727801e-06,
"loss": 0.1373,
"step": 35400
},
{
"epoch": 2.66,
"learning_rate": 1.1222188815393868e-06,
"loss": 0.0448,
"step": 35450
},
{
"epoch": 2.67,
"learning_rate": 1.1096913209059932e-06,
"loss": 0.062,
"step": 35500
},
{
"epoch": 2.67,
"learning_rate": 1.0971637602725999e-06,
"loss": 0.0721,
"step": 35550
},
{
"epoch": 2.68,
"learning_rate": 1.0846361996392063e-06,
"loss": 0.0927,
"step": 35600
},
{
"epoch": 2.68,
"learning_rate": 1.072108639005813e-06,
"loss": 0.077,
"step": 35650
},
{
"epoch": 2.68,
"learning_rate": 1.0595810783724194e-06,
"loss": 0.095,
"step": 35700
},
{
"epoch": 2.69,
"learning_rate": 1.0470535177390258e-06,
"loss": 0.1275,
"step": 35750
},
{
"epoch": 2.69,
"learning_rate": 1.0345259571056325e-06,
"loss": 0.1027,
"step": 35800
},
{
"epoch": 2.69,
"learning_rate": 1.0219983964722389e-06,
"loss": 0.0658,
"step": 35850
},
{
"epoch": 2.7,
"learning_rate": 1.0094708358388455e-06,
"loss": 0.1137,
"step": 35900
},
{
"epoch": 2.7,
"learning_rate": 9.969432752054522e-07,
"loss": 0.0858,
"step": 35950
},
{
"epoch": 2.71,
"learning_rate": 9.844157145720586e-07,
"loss": 0.1581,
"step": 36000
},
{
"epoch": 2.71,
"learning_rate": 9.71888153938665e-07,
"loss": 0.1337,
"step": 36050
},
{
"epoch": 2.71,
"learning_rate": 9.593605933052717e-07,
"loss": 0.1082,
"step": 36100
},
{
"epoch": 2.72,
"learning_rate": 9.468330326718781e-07,
"loss": 0.0852,
"step": 36150
},
{
"epoch": 2.72,
"learning_rate": 9.343054720384848e-07,
"loss": 0.1358,
"step": 36200
},
{
"epoch": 2.72,
"learning_rate": 9.217779114050912e-07,
"loss": 0.1008,
"step": 36250
},
{
"epoch": 2.73,
"learning_rate": 9.092503507716977e-07,
"loss": 0.0993,
"step": 36300
},
{
"epoch": 2.73,
"learning_rate": 8.967227901383044e-07,
"loss": 0.0818,
"step": 36350
},
{
"epoch": 2.74,
"learning_rate": 8.841952295049108e-07,
"loss": 0.034,
"step": 36400
},
{
"epoch": 2.74,
"learning_rate": 8.716676688715174e-07,
"loss": 0.1407,
"step": 36450
},
{
"epoch": 2.74,
"learning_rate": 8.59140108238124e-07,
"loss": 0.1252,
"step": 36500
},
{
"epoch": 2.75,
"learning_rate": 8.466125476047304e-07,
"loss": 0.1026,
"step": 36550
},
{
"epoch": 2.75,
"learning_rate": 8.34084986971337e-07,
"loss": 0.1041,
"step": 36600
},
{
"epoch": 2.75,
"learning_rate": 8.215574263379436e-07,
"loss": 0.0534,
"step": 36650
},
{
"epoch": 2.76,
"learning_rate": 8.090298657045501e-07,
"loss": 0.078,
"step": 36700
},
{
"epoch": 2.76,
"learning_rate": 7.965023050711566e-07,
"loss": 0.0818,
"step": 36750
},
{
"epoch": 2.77,
"learning_rate": 7.83974744437763e-07,
"loss": 0.1133,
"step": 36800
},
{
"epoch": 2.77,
"learning_rate": 7.714471838043697e-07,
"loss": 0.0813,
"step": 36850
},
{
"epoch": 2.77,
"learning_rate": 7.589196231709762e-07,
"loss": 0.0808,
"step": 36900
},
{
"epoch": 2.78,
"learning_rate": 7.463920625375827e-07,
"loss": 0.112,
"step": 36950
},
{
"epoch": 2.78,
"learning_rate": 7.338645019041893e-07,
"loss": 0.1386,
"step": 37000
},
{
"epoch": 2.78,
"learning_rate": 7.213369412707958e-07,
"loss": 0.0884,
"step": 37050
},
{
"epoch": 2.79,
"learning_rate": 7.088093806374023e-07,
"loss": 0.0881,
"step": 37100
},
{
"epoch": 2.79,
"learning_rate": 6.962818200040089e-07,
"loss": 0.0914,
"step": 37150
},
{
"epoch": 2.8,
"learning_rate": 6.837542593706154e-07,
"loss": 0.0976,
"step": 37200
},
{
"epoch": 2.8,
"learning_rate": 6.712266987372219e-07,
"loss": 0.0754,
"step": 37250
},
{
"epoch": 2.8,
"learning_rate": 6.586991381038285e-07,
"loss": 0.0551,
"step": 37300
},
{
"epoch": 2.81,
"learning_rate": 6.46171577470435e-07,
"loss": 0.0751,
"step": 37350
},
{
"epoch": 2.81,
"learning_rate": 6.336440168370415e-07,
"loss": 0.0754,
"step": 37400
},
{
"epoch": 2.81,
"learning_rate": 6.211164562036481e-07,
"loss": 0.1558,
"step": 37450
},
{
"epoch": 2.82,
"learning_rate": 6.085888955702546e-07,
"loss": 0.0613,
"step": 37500
},
{
"epoch": 2.82,
"learning_rate": 5.960613349368611e-07,
"loss": 0.1061,
"step": 37550
},
{
"epoch": 2.83,
"learning_rate": 5.835337743034677e-07,
"loss": 0.1007,
"step": 37600
},
{
"epoch": 2.83,
"learning_rate": 5.710062136700742e-07,
"loss": 0.0988,
"step": 37650
},
{
"epoch": 2.83,
"learning_rate": 5.584786530366808e-07,
"loss": 0.093,
"step": 37700
},
{
"epoch": 2.84,
"learning_rate": 5.459510924032873e-07,
"loss": 0.0538,
"step": 37750
},
{
"epoch": 2.84,
"learning_rate": 5.334235317698938e-07,
"loss": 0.1133,
"step": 37800
},
{
"epoch": 2.85,
"learning_rate": 5.208959711365004e-07,
"loss": 0.0627,
"step": 37850
},
{
"epoch": 2.85,
"learning_rate": 5.083684105031069e-07,
"loss": 0.1405,
"step": 37900
},
{
"epoch": 2.85,
"learning_rate": 4.958408498697135e-07,
"loss": 0.052,
"step": 37950
},
{
"epoch": 2.86,
"learning_rate": 4.8331328923632e-07,
"loss": 0.0724,
"step": 38000
},
{
"epoch": 2.86,
"learning_rate": 4.707857286029265e-07,
"loss": 0.0511,
"step": 38050
},
{
"epoch": 2.86,
"learning_rate": 4.58258167969533e-07,
"loss": 0.1113,
"step": 38100
},
{
"epoch": 2.87,
"learning_rate": 4.4573060733613956e-07,
"loss": 0.065,
"step": 38150
},
{
"epoch": 2.87,
"learning_rate": 4.3320304670274605e-07,
"loss": 0.0608,
"step": 38200
},
{
"epoch": 2.88,
"learning_rate": 4.206754860693526e-07,
"loss": 0.1273,
"step": 38250
},
{
"epoch": 2.88,
"learning_rate": 4.081479254359592e-07,
"loss": 0.0798,
"step": 38300
},
{
"epoch": 2.88,
"learning_rate": 3.9562036480256566e-07,
"loss": 0.089,
"step": 38350
},
{
"epoch": 2.89,
"learning_rate": 3.830928041691722e-07,
"loss": 0.0976,
"step": 38400
},
{
"epoch": 2.89,
"learning_rate": 3.7056524353577874e-07,
"loss": 0.1121,
"step": 38450
},
{
"epoch": 2.89,
"learning_rate": 3.580376829023853e-07,
"loss": 0.0719,
"step": 38500
},
{
"epoch": 2.9,
"learning_rate": 3.455101222689918e-07,
"loss": 0.0832,
"step": 38550
},
{
"epoch": 2.9,
"learning_rate": 3.329825616355983e-07,
"loss": 0.0517,
"step": 38600
},
{
"epoch": 2.91,
"learning_rate": 3.2045500100220485e-07,
"loss": 0.0982,
"step": 38650
},
{
"epoch": 2.91,
"learning_rate": 3.079274403688114e-07,
"loss": 0.115,
"step": 38700
},
{
"epoch": 2.91,
"learning_rate": 2.9539987973541793e-07,
"loss": 0.0898,
"step": 38750
},
{
"epoch": 2.92,
"learning_rate": 2.8287231910202447e-07,
"loss": 0.0668,
"step": 38800
},
{
"epoch": 2.92,
"learning_rate": 2.70344758468631e-07,
"loss": 0.0522,
"step": 38850
},
{
"epoch": 2.92,
"learning_rate": 2.5781719783523755e-07,
"loss": 0.1142,
"step": 38900
},
{
"epoch": 2.93,
"learning_rate": 2.452896372018441e-07,
"loss": 0.0738,
"step": 38950
},
{
"epoch": 2.93,
"learning_rate": 2.327620765684506e-07,
"loss": 0.1125,
"step": 39000
},
{
"epoch": 2.94,
"learning_rate": 2.2023451593505714e-07,
"loss": 0.104,
"step": 39050
},
{
"epoch": 2.94,
"learning_rate": 2.0770695530166365e-07,
"loss": 0.091,
"step": 39100
},
{
"epoch": 2.94,
"learning_rate": 1.9517939466827022e-07,
"loss": 0.0815,
"step": 39150
},
{
"epoch": 2.95,
"learning_rate": 1.8265183403487673e-07,
"loss": 0.089,
"step": 39200
},
{
"epoch": 2.95,
"learning_rate": 1.7012427340148327e-07,
"loss": 0.1148,
"step": 39250
},
{
"epoch": 2.95,
"learning_rate": 1.5759671276808984e-07,
"loss": 0.0725,
"step": 39300
},
{
"epoch": 2.96,
"learning_rate": 1.4506915213469635e-07,
"loss": 0.0447,
"step": 39350
},
{
"epoch": 2.96,
"learning_rate": 1.3254159150130287e-07,
"loss": 0.0976,
"step": 39400
},
{
"epoch": 2.97,
"learning_rate": 1.200140308679094e-07,
"loss": 0.0626,
"step": 39450
},
{
"epoch": 2.97,
"learning_rate": 1.0748647023451595e-07,
"loss": 0.1057,
"step": 39500
},
{
"epoch": 2.97,
"learning_rate": 9.495890960112247e-08,
"loss": 0.0659,
"step": 39550
},
{
"epoch": 2.98,
"learning_rate": 8.243134896772901e-08,
"loss": 0.113,
"step": 39600
},
{
"epoch": 2.98,
"learning_rate": 6.990378833433555e-08,
"loss": 0.0941,
"step": 39650
},
{
"epoch": 2.98,
"learning_rate": 5.762677891360995e-08,
"loss": 0.0771,
"step": 39700
},
{
"epoch": 2.99,
"learning_rate": 4.509921828021648e-08,
"loss": 0.0667,
"step": 39750
},
{
"epoch": 2.99,
"learning_rate": 3.2571657646823014e-08,
"loss": 0.0772,
"step": 39800
},
{
"epoch": 3.0,
"learning_rate": 2.0044097013429547e-08,
"loss": 0.08,
"step": 39850
},
{
"epoch": 3.0,
"learning_rate": 7.51653638003608e-09,
"loss": 0.0902,
"step": 39900
},
{
"epoch": 3.0,
"exact_match": 51.31086142322097,
"f1": 58.231894243296836,
"step": 39912
}
],
"max_steps": 39912,
"num_train_epochs": 3,
"total_flos": 4.447693873675008e+17,
"trial_name": null,
"trial_params": null
}