dynamic-minilmv2-L6-H384-squad1.1 / trainer_state.json
sguskin's picture
Upload trainer_state.json
1580f81
raw
history blame
38.2 kB
{
"best_metric": 89.28118310884184,
"best_model_checkpoint": "output/lat-minilm-layerdrop0.2/checkpoint-73500",
"epoch": 6.638966669677536,
"global_step": 73500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 2.9864510884292296e-05,
"loss": 1.2398,
"step": 500
},
{
"epoch": 0.05,
"eval_exact_match": 79.31882686849575,
"eval_f1": 87.06894613336114,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 2.972902176858459e-05,
"loss": 0.8042,
"step": 1000
},
{
"epoch": 0.09,
"eval_exact_match": 78.6565752128666,
"eval_f1": 86.87585899730593,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 2.9593532652876887e-05,
"loss": 0.7486,
"step": 1500
},
{
"epoch": 0.14,
"eval_exact_match": 79.20529801324503,
"eval_f1": 87.30527211803158,
"step": 1500
},
{
"epoch": 0.18,
"learning_rate": 2.9458043537169182e-05,
"loss": 0.7114,
"step": 2000
},
{
"epoch": 0.18,
"eval_exact_match": 79.67833491012298,
"eval_f1": 87.49088042810736,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 2.9322554421461477e-05,
"loss": 0.7036,
"step": 2500
},
{
"epoch": 0.23,
"eval_exact_match": 79.3282876064333,
"eval_f1": 87.37950296154845,
"step": 2500
},
{
"epoch": 0.27,
"learning_rate": 2.9187065305753772e-05,
"loss": 0.6913,
"step": 3000
},
{
"epoch": 0.27,
"eval_exact_match": 79.17691579943235,
"eval_f1": 87.41529490669386,
"step": 3000
},
{
"epoch": 0.32,
"learning_rate": 2.9051576190046068e-05,
"loss": 0.6968,
"step": 3500
},
{
"epoch": 0.32,
"eval_exact_match": 79.67833491012298,
"eval_f1": 87.43644498439642,
"step": 3500
},
{
"epoch": 0.36,
"learning_rate": 2.8916087074338363e-05,
"loss": 0.6308,
"step": 4000
},
{
"epoch": 0.36,
"eval_exact_match": 79.77294228949859,
"eval_f1": 87.71831118238406,
"step": 4000
},
{
"epoch": 0.41,
"learning_rate": 2.8780597958630658e-05,
"loss": 0.6146,
"step": 4500
},
{
"epoch": 0.41,
"eval_exact_match": 79.63103122043519,
"eval_f1": 87.53578264477454,
"step": 4500
},
{
"epoch": 0.45,
"learning_rate": 2.864510884292295e-05,
"loss": 0.6345,
"step": 5000
},
{
"epoch": 0.45,
"eval_exact_match": 79.50804162724693,
"eval_f1": 87.48678120571361,
"step": 5000
},
{
"epoch": 0.5,
"learning_rate": 2.850961972721525e-05,
"loss": 0.6153,
"step": 5500
},
{
"epoch": 0.5,
"eval_exact_match": 79.2336802270577,
"eval_f1": 87.30066036186288,
"step": 5500
},
{
"epoch": 0.54,
"learning_rate": 2.8374130611507544e-05,
"loss": 0.5926,
"step": 6000
},
{
"epoch": 0.54,
"eval_exact_match": 79.82970671712394,
"eval_f1": 87.5917752792044,
"step": 6000
},
{
"epoch": 0.59,
"learning_rate": 2.823864149579984e-05,
"loss": 0.6109,
"step": 6500
},
{
"epoch": 0.59,
"eval_exact_match": 79.36613055818354,
"eval_f1": 87.43013058208933,
"step": 6500
},
{
"epoch": 0.63,
"learning_rate": 2.8103152380092135e-05,
"loss": 0.5848,
"step": 7000
},
{
"epoch": 0.63,
"eval_exact_match": 80.28382213812678,
"eval_f1": 88.06441604943116,
"step": 7000
},
{
"epoch": 0.68,
"learning_rate": 2.796766326438443e-05,
"loss": 0.5942,
"step": 7500
},
{
"epoch": 0.68,
"eval_exact_match": 80.27436140018922,
"eval_f1": 88.00500107620664,
"step": 7500
},
{
"epoch": 0.72,
"learning_rate": 2.783217414867672e-05,
"loss": 0.6163,
"step": 8000
},
{
"epoch": 0.72,
"eval_exact_match": 80.29328287606434,
"eval_f1": 88.10467448485997,
"step": 8000
},
{
"epoch": 0.77,
"learning_rate": 2.7696685032969017e-05,
"loss": 0.5844,
"step": 8500
},
{
"epoch": 0.77,
"eval_exact_match": 80.35004730368969,
"eval_f1": 88.01050128741105,
"step": 8500
},
{
"epoch": 0.81,
"learning_rate": 2.7561195917261312e-05,
"loss": 0.5631,
"step": 9000
},
{
"epoch": 0.81,
"eval_exact_match": 79.92431409649953,
"eval_f1": 87.74870478586679,
"step": 9000
},
{
"epoch": 0.86,
"learning_rate": 2.742570680155361e-05,
"loss": 0.5536,
"step": 9500
},
{
"epoch": 0.86,
"eval_exact_match": 80.05676442762535,
"eval_f1": 88.0179438979553,
"step": 9500
},
{
"epoch": 0.9,
"learning_rate": 2.7290217685845906e-05,
"loss": 0.5747,
"step": 10000
},
{
"epoch": 0.9,
"eval_exact_match": 80.07568590350047,
"eval_f1": 87.78910197864867,
"step": 10000
},
{
"epoch": 0.95,
"learning_rate": 2.71547285701382e-05,
"loss": 0.5318,
"step": 10500
},
{
"epoch": 0.95,
"eval_exact_match": 79.68779564806054,
"eval_f1": 87.94430479946901,
"step": 10500
},
{
"epoch": 0.99,
"learning_rate": 2.7019239454430493e-05,
"loss": 0.5439,
"step": 11000
},
{
"epoch": 0.99,
"eval_exact_match": 80.23651844843897,
"eval_f1": 88.05636922118306,
"step": 11000
},
{
"epoch": 1.04,
"learning_rate": 2.688375033872279e-05,
"loss": 0.5453,
"step": 11500
},
{
"epoch": 1.04,
"eval_exact_match": 80.68117313150425,
"eval_f1": 88.22481668068082,
"step": 11500
},
{
"epoch": 1.08,
"learning_rate": 2.6748261223015084e-05,
"loss": 0.5347,
"step": 12000
},
{
"epoch": 1.08,
"eval_exact_match": 79.89593188268685,
"eval_f1": 87.85315644375596,
"step": 12000
},
{
"epoch": 1.13,
"learning_rate": 2.661277210730738e-05,
"loss": 0.5288,
"step": 12500
},
{
"epoch": 1.13,
"eval_exact_match": 79.94323557237465,
"eval_f1": 87.76766355954204,
"step": 12500
},
{
"epoch": 1.17,
"learning_rate": 2.6477282991599678e-05,
"loss": 0.5257,
"step": 13000
},
{
"epoch": 1.17,
"eval_exact_match": 80.10406811731315,
"eval_f1": 87.93914031745109,
"step": 13000
},
{
"epoch": 1.22,
"learning_rate": 2.6341793875891973e-05,
"loss": 0.538,
"step": 13500
},
{
"epoch": 1.22,
"eval_exact_match": 80.21759697256385,
"eval_f1": 87.89424889725127,
"step": 13500
},
{
"epoch": 1.26,
"learning_rate": 2.6206304760184265e-05,
"loss": 0.5268,
"step": 14000
},
{
"epoch": 1.26,
"eval_exact_match": 79.47019867549669,
"eval_f1": 87.84861972812077,
"step": 14000
},
{
"epoch": 1.31,
"learning_rate": 2.607081564447656e-05,
"loss": 0.5119,
"step": 14500
},
{
"epoch": 1.31,
"eval_exact_match": 79.9526963103122,
"eval_f1": 87.72084013791168,
"step": 14500
},
{
"epoch": 1.35,
"learning_rate": 2.5935326528768855e-05,
"loss": 0.5159,
"step": 15000
},
{
"epoch": 1.35,
"eval_exact_match": 79.96215704824976,
"eval_f1": 87.87919542003893,
"step": 15000
},
{
"epoch": 1.4,
"learning_rate": 2.579983741306115e-05,
"loss": 0.5095,
"step": 15500
},
{
"epoch": 1.4,
"eval_exact_match": 80.07568590350047,
"eval_f1": 87.8874110209929,
"step": 15500
},
{
"epoch": 1.45,
"learning_rate": 2.5664348297353446e-05,
"loss": 0.4962,
"step": 16000
},
{
"epoch": 1.45,
"eval_exact_match": 80.00946073793756,
"eval_f1": 87.95407143363992,
"step": 16000
},
{
"epoch": 1.49,
"learning_rate": 2.552885918164574e-05,
"loss": 0.5054,
"step": 16500
},
{
"epoch": 1.49,
"eval_exact_match": 80.10406811731315,
"eval_f1": 87.97174114111343,
"step": 16500
},
{
"epoch": 1.54,
"learning_rate": 2.539337006593804e-05,
"loss": 0.5078,
"step": 17000
},
{
"epoch": 1.54,
"eval_exact_match": 79.86754966887418,
"eval_f1": 88.0395382350121,
"step": 17000
},
{
"epoch": 1.58,
"learning_rate": 2.5257880950230332e-05,
"loss": 0.5091,
"step": 17500
},
{
"epoch": 1.58,
"eval_exact_match": 79.92431409649953,
"eval_f1": 87.90057496243983,
"step": 17500
},
{
"epoch": 1.63,
"learning_rate": 2.5122391834522627e-05,
"loss": 0.5135,
"step": 18000
},
{
"epoch": 1.63,
"eval_exact_match": 80.43519394512772,
"eval_f1": 88.18635359484611,
"step": 18000
},
{
"epoch": 1.67,
"learning_rate": 2.4986902718814922e-05,
"loss": 0.517,
"step": 18500
},
{
"epoch": 1.67,
"eval_exact_match": 80.7379375591296,
"eval_f1": 88.28976094688572,
"step": 18500
},
{
"epoch": 1.72,
"learning_rate": 2.4851413603107218e-05,
"loss": 0.497,
"step": 19000
},
{
"epoch": 1.72,
"eval_exact_match": 80.26490066225166,
"eval_f1": 87.99449485912551,
"step": 19000
},
{
"epoch": 1.76,
"learning_rate": 2.4715924487399513e-05,
"loss": 0.5164,
"step": 19500
},
{
"epoch": 1.76,
"eval_exact_match": 79.91485335856197,
"eval_f1": 87.90765730927956,
"step": 19500
},
{
"epoch": 1.81,
"learning_rate": 2.4580435371691808e-05,
"loss": 0.4915,
"step": 20000
},
{
"epoch": 1.81,
"eval_exact_match": 80.23651844843897,
"eval_f1": 88.02996373454239,
"step": 20000
},
{
"epoch": 1.85,
"learning_rate": 2.4444946255984103e-05,
"loss": 0.513,
"step": 20500
},
{
"epoch": 1.85,
"eval_exact_match": 80.93661305581836,
"eval_f1": 88.41605518009158,
"step": 20500
},
{
"epoch": 1.9,
"learning_rate": 2.43094571402764e-05,
"loss": 0.4909,
"step": 21000
},
{
"epoch": 1.9,
"eval_exact_match": 79.66887417218543,
"eval_f1": 87.85438349507432,
"step": 21000
},
{
"epoch": 1.94,
"learning_rate": 2.4173968024568694e-05,
"loss": 0.4989,
"step": 21500
},
{
"epoch": 1.94,
"eval_exact_match": 80.87038789025544,
"eval_f1": 88.5022200375229,
"step": 21500
},
{
"epoch": 1.99,
"learning_rate": 2.403847890886099e-05,
"loss": 0.4855,
"step": 22000
},
{
"epoch": 1.99,
"eval_exact_match": 80.99337748344371,
"eval_f1": 88.35925048846019,
"step": 22000
},
{
"epoch": 2.03,
"learning_rate": 2.3902989793153284e-05,
"loss": 0.4805,
"step": 22500
},
{
"epoch": 2.03,
"eval_exact_match": 80.78524124881741,
"eval_f1": 88.34608121611244,
"step": 22500
},
{
"epoch": 2.08,
"learning_rate": 2.376750067744558e-05,
"loss": 0.4738,
"step": 23000
},
{
"epoch": 2.08,
"eval_exact_match": 80.34058656575213,
"eval_f1": 88.28140858780678,
"step": 23000
},
{
"epoch": 2.12,
"learning_rate": 2.363201156173787e-05,
"loss": 0.4879,
"step": 23500
},
{
"epoch": 2.12,
"eval_exact_match": 80.26490066225166,
"eval_f1": 88.30355285051178,
"step": 23500
},
{
"epoch": 2.17,
"learning_rate": 2.349652244603017e-05,
"loss": 0.4964,
"step": 24000
},
{
"epoch": 2.17,
"eval_exact_match": 80.51087984862819,
"eval_f1": 88.17366194566542,
"step": 24000
},
{
"epoch": 2.21,
"learning_rate": 2.3361033330322465e-05,
"loss": 0.5022,
"step": 24500
},
{
"epoch": 2.21,
"eval_exact_match": 80.32166508987702,
"eval_f1": 88.12428636619218,
"step": 24500
},
{
"epoch": 2.26,
"learning_rate": 2.322554421461476e-05,
"loss": 0.4882,
"step": 25000
},
{
"epoch": 2.26,
"eval_exact_match": 80.58656575212866,
"eval_f1": 88.22604931836395,
"step": 25000
},
{
"epoch": 2.3,
"learning_rate": 2.3090055098907056e-05,
"loss": 0.4678,
"step": 25500
},
{
"epoch": 2.3,
"eval_exact_match": 80.58656575212866,
"eval_f1": 88.2634528049855,
"step": 25500
},
{
"epoch": 2.35,
"learning_rate": 2.295456598319935e-05,
"loss": 0.4672,
"step": 26000
},
{
"epoch": 2.35,
"eval_exact_match": 80.71901608325449,
"eval_f1": 88.37809284628581,
"step": 26000
},
{
"epoch": 2.39,
"learning_rate": 2.2819076867491643e-05,
"loss": 0.4686,
"step": 26500
},
{
"epoch": 2.39,
"eval_exact_match": 80.90823084200568,
"eval_f1": 88.23541700044127,
"step": 26500
},
{
"epoch": 2.44,
"learning_rate": 2.268358775178394e-05,
"loss": 0.4744,
"step": 27000
},
{
"epoch": 2.44,
"eval_exact_match": 80.66225165562913,
"eval_f1": 88.46120778742367,
"step": 27000
},
{
"epoch": 2.48,
"learning_rate": 2.2548098636076234e-05,
"loss": 0.4463,
"step": 27500
},
{
"epoch": 2.48,
"eval_exact_match": 80.47303689687796,
"eval_f1": 88.30861830433192,
"step": 27500
},
{
"epoch": 2.53,
"learning_rate": 2.2412609520368532e-05,
"loss": 0.481,
"step": 28000
},
{
"epoch": 2.53,
"eval_exact_match": 80.88930936613056,
"eval_f1": 88.3612801354343,
"step": 28000
},
{
"epoch": 2.57,
"learning_rate": 2.2277120404660828e-05,
"loss": 0.473,
"step": 28500
},
{
"epoch": 2.57,
"eval_exact_match": 80.7379375591296,
"eval_f1": 88.32052631156591,
"step": 28500
},
{
"epoch": 2.62,
"learning_rate": 2.2141631288953123e-05,
"loss": 0.4601,
"step": 29000
},
{
"epoch": 2.62,
"eval_exact_match": 80.39735099337749,
"eval_f1": 88.13224841833522,
"step": 29000
},
{
"epoch": 2.66,
"learning_rate": 2.2006142173245415e-05,
"loss": 0.4606,
"step": 29500
},
{
"epoch": 2.66,
"eval_exact_match": 80.90823084200568,
"eval_f1": 88.62783485215394,
"step": 29500
},
{
"epoch": 2.71,
"learning_rate": 2.187065305753771e-05,
"loss": 0.4609,
"step": 30000
},
{
"epoch": 2.71,
"eval_exact_match": 80.93661305581836,
"eval_f1": 88.59837554013453,
"step": 30000
},
{
"epoch": 2.75,
"learning_rate": 2.1735163941830005e-05,
"loss": 0.4578,
"step": 30500
},
{
"epoch": 2.75,
"eval_exact_match": 80.49195837275307,
"eval_f1": 88.36705697474612,
"step": 30500
},
{
"epoch": 2.8,
"learning_rate": 2.15996748261223e-05,
"loss": 0.4467,
"step": 31000
},
{
"epoch": 2.8,
"eval_exact_match": 80.81362346263009,
"eval_f1": 88.46787337530603,
"step": 31000
},
{
"epoch": 2.85,
"learning_rate": 2.14641857104146e-05,
"loss": 0.4642,
"step": 31500
},
{
"epoch": 2.85,
"eval_exact_match": 80.37842951750237,
"eval_f1": 88.4528665113568,
"step": 31500
},
{
"epoch": 2.89,
"learning_rate": 2.1328696594706894e-05,
"loss": 0.4609,
"step": 32000
},
{
"epoch": 2.89,
"eval_exact_match": 80.65279091769158,
"eval_f1": 88.47959709052238,
"step": 32000
},
{
"epoch": 2.94,
"learning_rate": 2.119320747899919e-05,
"loss": 0.4733,
"step": 32500
},
{
"epoch": 2.94,
"eval_exact_match": 81.01229895931883,
"eval_f1": 88.58771943604643,
"step": 32500
},
{
"epoch": 2.98,
"learning_rate": 2.105771836329148e-05,
"loss": 0.495,
"step": 33000
},
{
"epoch": 2.98,
"eval_exact_match": 80.94607379375591,
"eval_f1": 88.35844191107043,
"step": 33000
},
{
"epoch": 3.03,
"learning_rate": 2.0922229247583777e-05,
"loss": 0.4785,
"step": 33500
},
{
"epoch": 3.03,
"eval_exact_match": 80.95553453169347,
"eval_f1": 88.57282903581543,
"step": 33500
},
{
"epoch": 3.07,
"learning_rate": 2.0786740131876072e-05,
"loss": 0.4418,
"step": 34000
},
{
"epoch": 3.07,
"eval_exact_match": 80.40681173131505,
"eval_f1": 88.26725551094204,
"step": 34000
},
{
"epoch": 3.12,
"learning_rate": 2.0651251016168367e-05,
"loss": 0.4362,
"step": 34500
},
{
"epoch": 3.12,
"eval_exact_match": 80.63386944181646,
"eval_f1": 88.42647597963065,
"step": 34500
},
{
"epoch": 3.16,
"learning_rate": 2.0515761900460666e-05,
"loss": 0.4446,
"step": 35000
},
{
"epoch": 3.16,
"eval_exact_match": 80.56764427625355,
"eval_f1": 88.47064461350817,
"step": 35000
},
{
"epoch": 3.21,
"learning_rate": 2.038027278475296e-05,
"loss": 0.4646,
"step": 35500
},
{
"epoch": 3.21,
"eval_exact_match": 80.84200567644277,
"eval_f1": 88.51508828136045,
"step": 35500
},
{
"epoch": 3.25,
"learning_rate": 2.0244783669045253e-05,
"loss": 0.4523,
"step": 36000
},
{
"epoch": 3.25,
"eval_exact_match": 80.28382213812678,
"eval_f1": 88.19924608800193,
"step": 36000
},
{
"epoch": 3.3,
"learning_rate": 2.010929455333755e-05,
"loss": 0.4425,
"step": 36500
},
{
"epoch": 3.3,
"eval_exact_match": 79.91485335856197,
"eval_f1": 88.13089676308581,
"step": 36500
},
{
"epoch": 3.34,
"learning_rate": 1.9973805437629844e-05,
"loss": 0.4657,
"step": 37000
},
{
"epoch": 3.34,
"eval_exact_match": 80.71901608325449,
"eval_f1": 88.57633683586094,
"step": 37000
},
{
"epoch": 3.39,
"learning_rate": 1.983831632192214e-05,
"loss": 0.4397,
"step": 37500
},
{
"epoch": 3.39,
"eval_exact_match": 80.77578051087986,
"eval_f1": 88.52771953254631,
"step": 37500
},
{
"epoch": 3.43,
"learning_rate": 1.9702827206214434e-05,
"loss": 0.467,
"step": 38000
},
{
"epoch": 3.43,
"eval_exact_match": 81.15421002838221,
"eval_f1": 88.59585460154756,
"step": 38000
},
{
"epoch": 3.48,
"learning_rate": 1.956733809050673e-05,
"loss": 0.4511,
"step": 38500
},
{
"epoch": 3.48,
"eval_exact_match": 80.70009460737937,
"eval_f1": 88.52530706195175,
"step": 38500
},
{
"epoch": 3.52,
"learning_rate": 1.9431848974799025e-05,
"loss": 0.4486,
"step": 39000
},
{
"epoch": 3.52,
"eval_exact_match": 81.22989593188268,
"eval_f1": 88.78752508507468,
"step": 39000
},
{
"epoch": 3.57,
"learning_rate": 1.929635985909132e-05,
"loss": 0.4427,
"step": 39500
},
{
"epoch": 3.57,
"eval_exact_match": 80.70955534531693,
"eval_f1": 88.42915157910551,
"step": 39500
},
{
"epoch": 3.61,
"learning_rate": 1.9160870743383615e-05,
"loss": 0.4409,
"step": 40000
},
{
"epoch": 3.61,
"eval_exact_match": 80.63386944181646,
"eval_f1": 88.40123818446966,
"step": 40000
},
{
"epoch": 3.66,
"learning_rate": 1.902538162767591e-05,
"loss": 0.4315,
"step": 40500
},
{
"epoch": 3.66,
"eval_exact_match": 80.9271523178808,
"eval_f1": 88.61151182416103,
"step": 40500
},
{
"epoch": 3.7,
"learning_rate": 1.8889892511968206e-05,
"loss": 0.4519,
"step": 41000
},
{
"epoch": 3.7,
"eval_exact_match": 81.0406811731315,
"eval_f1": 88.70220098965468,
"step": 41000
},
{
"epoch": 3.75,
"learning_rate": 1.87544033962605e-05,
"loss": 0.4313,
"step": 41500
},
{
"epoch": 3.75,
"eval_exact_match": 80.71901608325449,
"eval_f1": 88.60516127330268,
"step": 41500
},
{
"epoch": 3.79,
"learning_rate": 1.8618914280552793e-05,
"loss": 0.4424,
"step": 42000
},
{
"epoch": 3.79,
"eval_exact_match": 81.12582781456953,
"eval_f1": 88.77989063786927,
"step": 42000
},
{
"epoch": 3.84,
"learning_rate": 1.8483425164845092e-05,
"loss": 0.4356,
"step": 42500
},
{
"epoch": 3.84,
"eval_exact_match": 81.07852412488174,
"eval_f1": 88.73012341031769,
"step": 42500
},
{
"epoch": 3.88,
"learning_rate": 1.8347936049137387e-05,
"loss": 0.4233,
"step": 43000
},
{
"epoch": 3.88,
"eval_exact_match": 81.14474929044465,
"eval_f1": 88.73867212169434,
"step": 43000
},
{
"epoch": 3.93,
"learning_rate": 1.8212446933429682e-05,
"loss": 0.4357,
"step": 43500
},
{
"epoch": 3.93,
"eval_exact_match": 81.07852412488174,
"eval_f1": 88.72753917464263,
"step": 43500
},
{
"epoch": 3.97,
"learning_rate": 1.8076957817721978e-05,
"loss": 0.4355,
"step": 44000
},
{
"epoch": 3.97,
"eval_exact_match": 81.23935666982024,
"eval_f1": 88.77406901625292,
"step": 44000
},
{
"epoch": 4.02,
"learning_rate": 1.7941468702014273e-05,
"loss": 0.4356,
"step": 44500
},
{
"epoch": 4.02,
"eval_exact_match": 81.22043519394512,
"eval_f1": 88.72410871066181,
"step": 44500
},
{
"epoch": 4.06,
"learning_rate": 1.7805979586306565e-05,
"loss": 0.4292,
"step": 45000
},
{
"epoch": 4.06,
"eval_exact_match": 81.07852412488174,
"eval_f1": 88.61817472462816,
"step": 45000
},
{
"epoch": 4.11,
"learning_rate": 1.767049047059886e-05,
"loss": 0.4312,
"step": 45500
},
{
"epoch": 4.11,
"eval_exact_match": 80.879848628193,
"eval_f1": 88.62528062832821,
"step": 45500
},
{
"epoch": 4.15,
"learning_rate": 1.753500135489116e-05,
"loss": 0.436,
"step": 46000
},
{
"epoch": 4.15,
"eval_exact_match": 81.10690633869442,
"eval_f1": 88.5411772547433,
"step": 46000
},
{
"epoch": 4.2,
"learning_rate": 1.7399512239183454e-05,
"loss": 0.4135,
"step": 46500
},
{
"epoch": 4.2,
"eval_exact_match": 80.90823084200568,
"eval_f1": 88.71001298725498,
"step": 46500
},
{
"epoch": 4.25,
"learning_rate": 1.726402312347575e-05,
"loss": 0.4389,
"step": 47000
},
{
"epoch": 4.25,
"eval_exact_match": 81.02175969725639,
"eval_f1": 88.69771041062324,
"step": 47000
},
{
"epoch": 4.29,
"learning_rate": 1.7128534007768044e-05,
"loss": 0.4268,
"step": 47500
},
{
"epoch": 4.29,
"eval_exact_match": 80.97445600756859,
"eval_f1": 88.65635803665829,
"step": 47500
},
{
"epoch": 4.34,
"learning_rate": 1.699304489206034e-05,
"loss": 0.448,
"step": 48000
},
{
"epoch": 4.34,
"eval_exact_match": 81.09744560075686,
"eval_f1": 88.52493417708553,
"step": 48000
},
{
"epoch": 4.38,
"learning_rate": 1.685755577635263e-05,
"loss": 0.4415,
"step": 48500
},
{
"epoch": 4.38,
"eval_exact_match": 81.36234626300852,
"eval_f1": 88.71503956482158,
"step": 48500
},
{
"epoch": 4.43,
"learning_rate": 1.6722066660644927e-05,
"loss": 0.4427,
"step": 49000
},
{
"epoch": 4.43,
"eval_exact_match": 81.59886471144749,
"eval_f1": 88.93333214465008,
"step": 49000
},
{
"epoch": 4.47,
"learning_rate": 1.6586577544937222e-05,
"loss": 0.4349,
"step": 49500
},
{
"epoch": 4.47,
"eval_exact_match": 81.57048249763481,
"eval_f1": 89.0872993027184,
"step": 49500
},
{
"epoch": 4.52,
"learning_rate": 1.645108842922952e-05,
"loss": 0.395,
"step": 50000
},
{
"epoch": 4.52,
"eval_exact_match": 81.3907284768212,
"eval_f1": 88.9695894715022,
"step": 50000
},
{
"epoch": 4.56,
"learning_rate": 1.6315599313521816e-05,
"loss": 0.4067,
"step": 50500
},
{
"epoch": 4.56,
"eval_exact_match": 81.47587511825922,
"eval_f1": 88.83111689573867,
"step": 50500
},
{
"epoch": 4.61,
"learning_rate": 1.618011019781411e-05,
"loss": 0.4432,
"step": 51000
},
{
"epoch": 4.61,
"eval_exact_match": 81.3907284768212,
"eval_f1": 89.02888182388233,
"step": 51000
},
{
"epoch": 4.65,
"learning_rate": 1.6044621082106403e-05,
"loss": 0.4344,
"step": 51500
},
{
"epoch": 4.65,
"eval_exact_match": 81.40964995269631,
"eval_f1": 88.92563712225324,
"step": 51500
},
{
"epoch": 4.7,
"learning_rate": 1.59091319663987e-05,
"loss": 0.4373,
"step": 52000
},
{
"epoch": 4.7,
"eval_exact_match": 81.2488174077578,
"eval_f1": 88.78925066888768,
"step": 52000
},
{
"epoch": 4.74,
"learning_rate": 1.5773642850690994e-05,
"loss": 0.4258,
"step": 52500
},
{
"epoch": 4.74,
"eval_exact_match": 81.69347209082308,
"eval_f1": 89.0722516537511,
"step": 52500
},
{
"epoch": 4.79,
"learning_rate": 1.563815373498329e-05,
"loss": 0.4468,
"step": 53000
},
{
"epoch": 4.79,
"eval_exact_match": 81.47587511825922,
"eval_f1": 88.87689903706278,
"step": 53000
},
{
"epoch": 4.83,
"learning_rate": 1.5502664619275588e-05,
"loss": 0.4062,
"step": 53500
},
{
"epoch": 4.83,
"eval_exact_match": 81.15421002838221,
"eval_f1": 88.63741928726395,
"step": 53500
},
{
"epoch": 4.88,
"learning_rate": 1.5367175503567883e-05,
"loss": 0.4273,
"step": 54000
},
{
"epoch": 4.88,
"eval_exact_match": 81.09744560075686,
"eval_f1": 88.61286118906956,
"step": 54000
},
{
"epoch": 4.92,
"learning_rate": 1.5231686387860175e-05,
"loss": 0.4326,
"step": 54500
},
{
"epoch": 4.92,
"eval_exact_match": 81.51371807000946,
"eval_f1": 88.74438704122335,
"step": 54500
},
{
"epoch": 4.97,
"learning_rate": 1.509619727215247e-05,
"loss": 0.4303,
"step": 55000
},
{
"epoch": 4.97,
"eval_exact_match": 81.57994323557237,
"eval_f1": 88.9163558462002,
"step": 55000
},
{
"epoch": 5.01,
"learning_rate": 1.4960708156444765e-05,
"loss": 0.4171,
"step": 55500
},
{
"epoch": 5.01,
"eval_exact_match": 81.54210028382214,
"eval_f1": 88.96019553881521,
"step": 55500
},
{
"epoch": 5.06,
"learning_rate": 1.482521904073706e-05,
"loss": 0.4427,
"step": 56000
},
{
"epoch": 5.06,
"eval_exact_match": 81.5515610217597,
"eval_f1": 89.03945416392548,
"step": 56000
},
{
"epoch": 5.1,
"learning_rate": 1.4689729925029357e-05,
"loss": 0.4314,
"step": 56500
},
{
"epoch": 5.1,
"eval_exact_match": 81.31504257332072,
"eval_f1": 89.09084382975136,
"step": 56500
},
{
"epoch": 5.15,
"learning_rate": 1.4554240809321651e-05,
"loss": 0.4247,
"step": 57000
},
{
"epoch": 5.15,
"eval_exact_match": 81.33396404919584,
"eval_f1": 89.03107080645903,
"step": 57000
},
{
"epoch": 5.19,
"learning_rate": 1.4418751693613946e-05,
"loss": 0.4304,
"step": 57500
},
{
"epoch": 5.19,
"eval_exact_match": 81.30558183538317,
"eval_f1": 88.77952011430634,
"step": 57500
},
{
"epoch": 5.24,
"learning_rate": 1.4283262577906243e-05,
"loss": 0.4061,
"step": 58000
},
{
"epoch": 5.24,
"eval_exact_match": 81.15421002838221,
"eval_f1": 88.9057099874325,
"step": 58000
},
{
"epoch": 5.28,
"learning_rate": 1.4147773462198537e-05,
"loss": 0.4176,
"step": 58500
},
{
"epoch": 5.28,
"eval_exact_match": 81.09744560075686,
"eval_f1": 88.86774926551541,
"step": 58500
},
{
"epoch": 5.33,
"learning_rate": 1.4012284346490832e-05,
"loss": 0.4287,
"step": 59000
},
{
"epoch": 5.33,
"eval_exact_match": 81.3907284768212,
"eval_f1": 88.89156441024593,
"step": 59000
},
{
"epoch": 5.37,
"learning_rate": 1.3876795230783127e-05,
"loss": 0.4308,
"step": 59500
},
{
"epoch": 5.37,
"eval_exact_match": 81.41911069063387,
"eval_f1": 88.80592923380453,
"step": 59500
},
{
"epoch": 5.42,
"learning_rate": 1.3741306115075423e-05,
"loss": 0.4458,
"step": 60000
},
{
"epoch": 5.42,
"eval_exact_match": 81.16367076631977,
"eval_f1": 88.75077232883521,
"step": 60000
},
{
"epoch": 5.46,
"learning_rate": 1.3605816999367718e-05,
"loss": 0.4166,
"step": 60500
},
{
"epoch": 5.46,
"eval_exact_match": 81.18259224219489,
"eval_f1": 88.88618647663102,
"step": 60500
},
{
"epoch": 5.51,
"learning_rate": 1.3470327883660013e-05,
"loss": 0.4161,
"step": 61000
},
{
"epoch": 5.51,
"eval_exact_match": 81.57048249763481,
"eval_f1": 88.8039726637868,
"step": 61000
},
{
"epoch": 5.56,
"learning_rate": 1.3334838767952307e-05,
"loss": 0.4229,
"step": 61500
},
{
"epoch": 5.56,
"eval_exact_match": 80.68117313150425,
"eval_f1": 88.60323091277711,
"step": 61500
},
{
"epoch": 5.6,
"learning_rate": 1.3199349652244604e-05,
"loss": 0.4162,
"step": 62000
},
{
"epoch": 5.6,
"eval_exact_match": 81.0879848628193,
"eval_f1": 88.76710612537406,
"step": 62000
},
{
"epoch": 5.65,
"learning_rate": 1.3063860536536899e-05,
"loss": 0.436,
"step": 62500
},
{
"epoch": 5.65,
"eval_exact_match": 80.94607379375591,
"eval_f1": 88.62676072578118,
"step": 62500
},
{
"epoch": 5.69,
"learning_rate": 1.2928371420829193e-05,
"loss": 0.4053,
"step": 63000
},
{
"epoch": 5.69,
"eval_exact_match": 81.18259224219489,
"eval_f1": 88.85559696479606,
"step": 63000
},
{
"epoch": 5.74,
"learning_rate": 1.279288230512149e-05,
"loss": 0.4268,
"step": 63500
},
{
"epoch": 5.74,
"eval_exact_match": 81.44749290444655,
"eval_f1": 89.02794352467788,
"step": 63500
},
{
"epoch": 5.78,
"learning_rate": 1.2657393189413785e-05,
"loss": 0.4387,
"step": 64000
},
{
"epoch": 5.78,
"eval_exact_match": 81.57994323557237,
"eval_f1": 88.94398669273697,
"step": 64000
},
{
"epoch": 5.83,
"learning_rate": 1.2521904073706078e-05,
"loss": 0.416,
"step": 64500
},
{
"epoch": 5.83,
"eval_exact_match": 81.5515610217597,
"eval_f1": 88.99506075304372,
"step": 64500
},
{
"epoch": 5.87,
"learning_rate": 1.2386414957998374e-05,
"loss": 0.4172,
"step": 65000
},
{
"epoch": 5.87,
"eval_exact_match": 81.22989593188268,
"eval_f1": 88.85658818631349,
"step": 65000
},
{
"epoch": 5.92,
"learning_rate": 1.225092584229067e-05,
"loss": 0.4358,
"step": 65500
},
{
"epoch": 5.92,
"eval_exact_match": 81.68401135288552,
"eval_f1": 89.0622553654262,
"step": 65500
},
{
"epoch": 5.96,
"learning_rate": 1.2115436726582964e-05,
"loss": 0.4152,
"step": 66000
},
{
"epoch": 5.96,
"eval_exact_match": 81.20151371807,
"eval_f1": 88.97429178716622,
"step": 66000
},
{
"epoch": 6.01,
"learning_rate": 1.197994761087526e-05,
"loss": 0.4305,
"step": 66500
},
{
"epoch": 6.01,
"eval_exact_match": 81.18259224219489,
"eval_f1": 88.77695235966252,
"step": 66500
},
{
"epoch": 6.05,
"learning_rate": 1.1844458495167555e-05,
"loss": 0.4281,
"step": 67000
},
{
"epoch": 6.05,
"eval_exact_match": 81.33396404919584,
"eval_f1": 88.78860377010713,
"step": 67000
},
{
"epoch": 6.1,
"learning_rate": 1.170896937945985e-05,
"loss": 0.4354,
"step": 67500
},
{
"epoch": 6.1,
"eval_exact_match": 80.91769157994324,
"eval_f1": 88.40213267079321,
"step": 67500
},
{
"epoch": 6.14,
"learning_rate": 1.1573480263752145e-05,
"loss": 0.4538,
"step": 68000
},
{
"epoch": 6.14,
"eval_exact_match": 81.28666035950805,
"eval_f1": 88.74956515546637,
"step": 68000
},
{
"epoch": 6.19,
"learning_rate": 1.143799114804444e-05,
"loss": 0.4211,
"step": 68500
},
{
"epoch": 6.19,
"eval_exact_match": 81.0879848628193,
"eval_f1": 88.67833249431447,
"step": 68500
},
{
"epoch": 6.23,
"learning_rate": 1.1302502032336736e-05,
"loss": 0.4222,
"step": 69000
},
{
"epoch": 6.23,
"eval_exact_match": 81.16367076631977,
"eval_f1": 88.62960238697383,
"step": 69000
},
{
"epoch": 6.28,
"learning_rate": 1.1167012916629031e-05,
"loss": 0.4196,
"step": 69500
},
{
"epoch": 6.28,
"eval_exact_match": 81.35288552507096,
"eval_f1": 88.85744103369602,
"step": 69500
},
{
"epoch": 6.32,
"learning_rate": 1.1031523800921326e-05,
"loss": 0.4201,
"step": 70000
},
{
"epoch": 6.32,
"eval_exact_match": 81.19205298013244,
"eval_f1": 88.59874172307164,
"step": 70000
},
{
"epoch": 6.37,
"learning_rate": 1.0896034685213622e-05,
"loss": 0.4055,
"step": 70500
},
{
"epoch": 6.37,
"eval_exact_match": 80.68117313150425,
"eval_f1": 88.45994219213983,
"step": 70500
},
{
"epoch": 6.41,
"learning_rate": 1.0760545569505917e-05,
"loss": 0.4094,
"step": 71000
},
{
"epoch": 6.41,
"eval_exact_match": 81.76915799432356,
"eval_f1": 89.01460171321759,
"step": 71000
},
{
"epoch": 6.46,
"learning_rate": 1.0625056453798212e-05,
"loss": 0.3936,
"step": 71500
},
{
"epoch": 6.46,
"eval_exact_match": 81.42857142857143,
"eval_f1": 88.92500663268179,
"step": 71500
},
{
"epoch": 6.5,
"learning_rate": 1.0489567338090507e-05,
"loss": 0.4185,
"step": 72000
},
{
"epoch": 6.5,
"eval_exact_match": 81.60832544938505,
"eval_f1": 89.06733554933447,
"step": 72000
},
{
"epoch": 6.55,
"learning_rate": 1.0354078222382801e-05,
"loss": 0.4218,
"step": 72500
},
{
"epoch": 6.55,
"eval_exact_match": 80.83254493850521,
"eval_f1": 88.84006035617828,
"step": 72500
},
{
"epoch": 6.59,
"learning_rate": 1.0218589106675098e-05,
"loss": 0.3984,
"step": 73000
},
{
"epoch": 6.59,
"eval_exact_match": 81.9678334910123,
"eval_f1": 89.27324196045087,
"step": 73000
},
{
"epoch": 6.64,
"learning_rate": 1.0083099990967393e-05,
"loss": 0.4313,
"step": 73500
},
{
"epoch": 6.64,
"eval_exact_match": 81.90160832544939,
"eval_f1": 89.28118310884184,
"step": 73500
}
],
"max_steps": 110710,
"num_train_epochs": 10,
"total_flos": 1.4425835526144e+16,
"trial_name": null,
"trial_params": null
}