|
{ |
|
"best_metric": 89.28118310884184, |
|
"best_model_checkpoint": "output/lat-minilm-layerdrop0.2/checkpoint-73500", |
|
"epoch": 6.638966669677536, |
|
"global_step": 73500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9864510884292296e-05, |
|
"loss": 1.2398, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 79.31882686849575, |
|
"eval_f1": 87.06894613336114, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.972902176858459e-05, |
|
"loss": 0.8042, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_exact_match": 78.6565752128666, |
|
"eval_f1": 86.87585899730593, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.9593532652876887e-05, |
|
"loss": 0.7486, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_exact_match": 79.20529801324503, |
|
"eval_f1": 87.30527211803158, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9458043537169182e-05, |
|
"loss": 0.7114, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_exact_match": 79.67833491012298, |
|
"eval_f1": 87.49088042810736, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.9322554421461477e-05, |
|
"loss": 0.7036, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_exact_match": 79.3282876064333, |
|
"eval_f1": 87.37950296154845, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.9187065305753772e-05, |
|
"loss": 0.6913, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 79.17691579943235, |
|
"eval_f1": 87.41529490669386, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.9051576190046068e-05, |
|
"loss": 0.6968, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_exact_match": 79.67833491012298, |
|
"eval_f1": 87.43644498439642, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.8916087074338363e-05, |
|
"loss": 0.6308, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_exact_match": 79.77294228949859, |
|
"eval_f1": 87.71831118238406, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.8780597958630658e-05, |
|
"loss": 0.6146, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_exact_match": 79.63103122043519, |
|
"eval_f1": 87.53578264477454, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.864510884292295e-05, |
|
"loss": 0.6345, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_exact_match": 79.50804162724693, |
|
"eval_f1": 87.48678120571361, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.850961972721525e-05, |
|
"loss": 0.6153, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_exact_match": 79.2336802270577, |
|
"eval_f1": 87.30066036186288, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.8374130611507544e-05, |
|
"loss": 0.5926, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_exact_match": 79.82970671712394, |
|
"eval_f1": 87.5917752792044, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.823864149579984e-05, |
|
"loss": 0.6109, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_exact_match": 79.36613055818354, |
|
"eval_f1": 87.43013058208933, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.8103152380092135e-05, |
|
"loss": 0.5848, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_exact_match": 80.28382213812678, |
|
"eval_f1": 88.06441604943116, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.796766326438443e-05, |
|
"loss": 0.5942, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_exact_match": 80.27436140018922, |
|
"eval_f1": 88.00500107620664, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.783217414867672e-05, |
|
"loss": 0.6163, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_exact_match": 80.29328287606434, |
|
"eval_f1": 88.10467448485997, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7696685032969017e-05, |
|
"loss": 0.5844, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 80.35004730368969, |
|
"eval_f1": 88.01050128741105, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.7561195917261312e-05, |
|
"loss": 0.5631, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_exact_match": 79.92431409649953, |
|
"eval_f1": 87.74870478586679, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.742570680155361e-05, |
|
"loss": 0.5536, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_exact_match": 80.05676442762535, |
|
"eval_f1": 88.0179438979553, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7290217685845906e-05, |
|
"loss": 0.5747, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_exact_match": 80.07568590350047, |
|
"eval_f1": 87.78910197864867, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.71547285701382e-05, |
|
"loss": 0.5318, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_exact_match": 79.68779564806054, |
|
"eval_f1": 87.94430479946901, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7019239454430493e-05, |
|
"loss": 0.5439, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_exact_match": 80.23651844843897, |
|
"eval_f1": 88.05636922118306, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.688375033872279e-05, |
|
"loss": 0.5453, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 80.68117313150425, |
|
"eval_f1": 88.22481668068082, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.6748261223015084e-05, |
|
"loss": 0.5347, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_exact_match": 79.89593188268685, |
|
"eval_f1": 87.85315644375596, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.661277210730738e-05, |
|
"loss": 0.5288, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_exact_match": 79.94323557237465, |
|
"eval_f1": 87.76766355954204, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.6477282991599678e-05, |
|
"loss": 0.5257, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_exact_match": 80.10406811731315, |
|
"eval_f1": 87.93914031745109, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.6341793875891973e-05, |
|
"loss": 0.538, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_exact_match": 80.21759697256385, |
|
"eval_f1": 87.89424889725127, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.6206304760184265e-05, |
|
"loss": 0.5268, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 79.47019867549669, |
|
"eval_f1": 87.84861972812077, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.607081564447656e-05, |
|
"loss": 0.5119, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 79.9526963103122, |
|
"eval_f1": 87.72084013791168, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.5935326528768855e-05, |
|
"loss": 0.5159, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_exact_match": 79.96215704824976, |
|
"eval_f1": 87.87919542003893, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.579983741306115e-05, |
|
"loss": 0.5095, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_exact_match": 80.07568590350047, |
|
"eval_f1": 87.8874110209929, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5664348297353446e-05, |
|
"loss": 0.4962, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_exact_match": 80.00946073793756, |
|
"eval_f1": 87.95407143363992, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.552885918164574e-05, |
|
"loss": 0.5054, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_exact_match": 80.10406811731315, |
|
"eval_f1": 87.97174114111343, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.539337006593804e-05, |
|
"loss": 0.5078, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_exact_match": 79.86754966887418, |
|
"eval_f1": 88.0395382350121, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.5257880950230332e-05, |
|
"loss": 0.5091, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_exact_match": 79.92431409649953, |
|
"eval_f1": 87.90057496243983, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5122391834522627e-05, |
|
"loss": 0.5135, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_exact_match": 80.43519394512772, |
|
"eval_f1": 88.18635359484611, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4986902718814922e-05, |
|
"loss": 0.517, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_exact_match": 80.7379375591296, |
|
"eval_f1": 88.28976094688572, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.4851413603107218e-05, |
|
"loss": 0.497, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_exact_match": 80.26490066225166, |
|
"eval_f1": 87.99449485912551, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.4715924487399513e-05, |
|
"loss": 0.5164, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_exact_match": 79.91485335856197, |
|
"eval_f1": 87.90765730927956, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.4580435371691808e-05, |
|
"loss": 0.4915, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 80.23651844843897, |
|
"eval_f1": 88.02996373454239, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.4444946255984103e-05, |
|
"loss": 0.513, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_exact_match": 80.93661305581836, |
|
"eval_f1": 88.41605518009158, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.43094571402764e-05, |
|
"loss": 0.4909, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_exact_match": 79.66887417218543, |
|
"eval_f1": 87.85438349507432, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.4173968024568694e-05, |
|
"loss": 0.4989, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_exact_match": 80.87038789025544, |
|
"eval_f1": 88.5022200375229, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.403847890886099e-05, |
|
"loss": 0.4855, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_exact_match": 80.99337748344371, |
|
"eval_f1": 88.35925048846019, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.3902989793153284e-05, |
|
"loss": 0.4805, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_exact_match": 80.78524124881741, |
|
"eval_f1": 88.34608121611244, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.376750067744558e-05, |
|
"loss": 0.4738, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_exact_match": 80.34058656575213, |
|
"eval_f1": 88.28140858780678, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.363201156173787e-05, |
|
"loss": 0.4879, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_exact_match": 80.26490066225166, |
|
"eval_f1": 88.30355285051178, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.349652244603017e-05, |
|
"loss": 0.4964, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_exact_match": 80.51087984862819, |
|
"eval_f1": 88.17366194566542, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.3361033330322465e-05, |
|
"loss": 0.5022, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_exact_match": 80.32166508987702, |
|
"eval_f1": 88.12428636619218, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.322554421461476e-05, |
|
"loss": 0.4882, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_exact_match": 80.58656575212866, |
|
"eval_f1": 88.22604931836395, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.3090055098907056e-05, |
|
"loss": 0.4678, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_exact_match": 80.58656575212866, |
|
"eval_f1": 88.2634528049855, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.295456598319935e-05, |
|
"loss": 0.4672, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_exact_match": 80.71901608325449, |
|
"eval_f1": 88.37809284628581, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.2819076867491643e-05, |
|
"loss": 0.4686, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_exact_match": 80.90823084200568, |
|
"eval_f1": 88.23541700044127, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.268358775178394e-05, |
|
"loss": 0.4744, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_exact_match": 80.66225165562913, |
|
"eval_f1": 88.46120778742367, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.2548098636076234e-05, |
|
"loss": 0.4463, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_exact_match": 80.47303689687796, |
|
"eval_f1": 88.30861830433192, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.2412609520368532e-05, |
|
"loss": 0.481, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_exact_match": 80.88930936613056, |
|
"eval_f1": 88.3612801354343, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.2277120404660828e-05, |
|
"loss": 0.473, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 80.7379375591296, |
|
"eval_f1": 88.32052631156591, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.2141631288953123e-05, |
|
"loss": 0.4601, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_exact_match": 80.39735099337749, |
|
"eval_f1": 88.13224841833522, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.2006142173245415e-05, |
|
"loss": 0.4606, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_exact_match": 80.90823084200568, |
|
"eval_f1": 88.62783485215394, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.187065305753771e-05, |
|
"loss": 0.4609, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_exact_match": 80.93661305581836, |
|
"eval_f1": 88.59837554013453, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.1735163941830005e-05, |
|
"loss": 0.4578, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_exact_match": 80.49195837275307, |
|
"eval_f1": 88.36705697474612, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.15996748261223e-05, |
|
"loss": 0.4467, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_exact_match": 80.81362346263009, |
|
"eval_f1": 88.46787337530603, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.14641857104146e-05, |
|
"loss": 0.4642, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_exact_match": 80.37842951750237, |
|
"eval_f1": 88.4528665113568, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1328696594706894e-05, |
|
"loss": 0.4609, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_exact_match": 80.65279091769158, |
|
"eval_f1": 88.47959709052238, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.119320747899919e-05, |
|
"loss": 0.4733, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_exact_match": 81.01229895931883, |
|
"eval_f1": 88.58771943604643, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.105771836329148e-05, |
|
"loss": 0.495, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_exact_match": 80.94607379375591, |
|
"eval_f1": 88.35844191107043, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.0922229247583777e-05, |
|
"loss": 0.4785, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_exact_match": 80.95553453169347, |
|
"eval_f1": 88.57282903581543, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.0786740131876072e-05, |
|
"loss": 0.4418, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_exact_match": 80.40681173131505, |
|
"eval_f1": 88.26725551094204, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.0651251016168367e-05, |
|
"loss": 0.4362, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_exact_match": 80.63386944181646, |
|
"eval_f1": 88.42647597963065, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.0515761900460666e-05, |
|
"loss": 0.4446, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_exact_match": 80.56764427625355, |
|
"eval_f1": 88.47064461350817, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.038027278475296e-05, |
|
"loss": 0.4646, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_exact_match": 80.84200567644277, |
|
"eval_f1": 88.51508828136045, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2.0244783669045253e-05, |
|
"loss": 0.4523, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_exact_match": 80.28382213812678, |
|
"eval_f1": 88.19924608800193, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.010929455333755e-05, |
|
"loss": 0.4425, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_exact_match": 79.91485335856197, |
|
"eval_f1": 88.13089676308581, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.9973805437629844e-05, |
|
"loss": 0.4657, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_exact_match": 80.71901608325449, |
|
"eval_f1": 88.57633683586094, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.983831632192214e-05, |
|
"loss": 0.4397, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_exact_match": 80.77578051087986, |
|
"eval_f1": 88.52771953254631, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.9702827206214434e-05, |
|
"loss": 0.467, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_exact_match": 81.15421002838221, |
|
"eval_f1": 88.59585460154756, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.956733809050673e-05, |
|
"loss": 0.4511, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_exact_match": 80.70009460737937, |
|
"eval_f1": 88.52530706195175, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.9431848974799025e-05, |
|
"loss": 0.4486, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_exact_match": 81.22989593188268, |
|
"eval_f1": 88.78752508507468, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.929635985909132e-05, |
|
"loss": 0.4427, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_exact_match": 80.70955534531693, |
|
"eval_f1": 88.42915157910551, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.9160870743383615e-05, |
|
"loss": 0.4409, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_exact_match": 80.63386944181646, |
|
"eval_f1": 88.40123818446966, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.902538162767591e-05, |
|
"loss": 0.4315, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_exact_match": 80.9271523178808, |
|
"eval_f1": 88.61151182416103, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.8889892511968206e-05, |
|
"loss": 0.4519, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_exact_match": 81.0406811731315, |
|
"eval_f1": 88.70220098965468, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.87544033962605e-05, |
|
"loss": 0.4313, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_exact_match": 80.71901608325449, |
|
"eval_f1": 88.60516127330268, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.8618914280552793e-05, |
|
"loss": 0.4424, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_exact_match": 81.12582781456953, |
|
"eval_f1": 88.77989063786927, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.8483425164845092e-05, |
|
"loss": 0.4356, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_exact_match": 81.07852412488174, |
|
"eval_f1": 88.73012341031769, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1.8347936049137387e-05, |
|
"loss": 0.4233, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_exact_match": 81.14474929044465, |
|
"eval_f1": 88.73867212169434, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.8212446933429682e-05, |
|
"loss": 0.4357, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_exact_match": 81.07852412488174, |
|
"eval_f1": 88.72753917464263, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.8076957817721978e-05, |
|
"loss": 0.4355, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_exact_match": 81.23935666982024, |
|
"eval_f1": 88.77406901625292, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.7941468702014273e-05, |
|
"loss": 0.4356, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_exact_match": 81.22043519394512, |
|
"eval_f1": 88.72410871066181, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 1.7805979586306565e-05, |
|
"loss": 0.4292, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_exact_match": 81.07852412488174, |
|
"eval_f1": 88.61817472462816, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.767049047059886e-05, |
|
"loss": 0.4312, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_exact_match": 80.879848628193, |
|
"eval_f1": 88.62528062832821, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.753500135489116e-05, |
|
"loss": 0.436, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_exact_match": 81.10690633869442, |
|
"eval_f1": 88.5411772547433, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.7399512239183454e-05, |
|
"loss": 0.4135, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_exact_match": 80.90823084200568, |
|
"eval_f1": 88.71001298725498, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.726402312347575e-05, |
|
"loss": 0.4389, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_exact_match": 81.02175969725639, |
|
"eval_f1": 88.69771041062324, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.7128534007768044e-05, |
|
"loss": 0.4268, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_exact_match": 80.97445600756859, |
|
"eval_f1": 88.65635803665829, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.699304489206034e-05, |
|
"loss": 0.448, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"eval_exact_match": 81.09744560075686, |
|
"eval_f1": 88.52493417708553, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.685755577635263e-05, |
|
"loss": 0.4415, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_exact_match": 81.36234626300852, |
|
"eval_f1": 88.71503956482158, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.6722066660644927e-05, |
|
"loss": 0.4427, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_exact_match": 81.59886471144749, |
|
"eval_f1": 88.93333214465008, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.6586577544937222e-05, |
|
"loss": 0.4349, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_exact_match": 81.57048249763481, |
|
"eval_f1": 89.0872993027184, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.645108842922952e-05, |
|
"loss": 0.395, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_exact_match": 81.3907284768212, |
|
"eval_f1": 88.9695894715022, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.6315599313521816e-05, |
|
"loss": 0.4067, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_exact_match": 81.47587511825922, |
|
"eval_f1": 88.83111689573867, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.618011019781411e-05, |
|
"loss": 0.4432, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_exact_match": 81.3907284768212, |
|
"eval_f1": 89.02888182388233, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.6044621082106403e-05, |
|
"loss": 0.4344, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_exact_match": 81.40964995269631, |
|
"eval_f1": 88.92563712225324, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.59091319663987e-05, |
|
"loss": 0.4373, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_exact_match": 81.2488174077578, |
|
"eval_f1": 88.78925066888768, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.5773642850690994e-05, |
|
"loss": 0.4258, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_exact_match": 81.69347209082308, |
|
"eval_f1": 89.0722516537511, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 1.563815373498329e-05, |
|
"loss": 0.4468, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_exact_match": 81.47587511825922, |
|
"eval_f1": 88.87689903706278, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.5502664619275588e-05, |
|
"loss": 0.4062, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_exact_match": 81.15421002838221, |
|
"eval_f1": 88.63741928726395, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.5367175503567883e-05, |
|
"loss": 0.4273, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_exact_match": 81.09744560075686, |
|
"eval_f1": 88.61286118906956, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.5231686387860175e-05, |
|
"loss": 0.4326, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_exact_match": 81.51371807000946, |
|
"eval_f1": 88.74438704122335, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.509619727215247e-05, |
|
"loss": 0.4303, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_exact_match": 81.57994323557237, |
|
"eval_f1": 88.9163558462002, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 1.4960708156444765e-05, |
|
"loss": 0.4171, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_exact_match": 81.54210028382214, |
|
"eval_f1": 88.96019553881521, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.482521904073706e-05, |
|
"loss": 0.4427, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_exact_match": 81.5515610217597, |
|
"eval_f1": 89.03945416392548, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 1.4689729925029357e-05, |
|
"loss": 0.4314, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_exact_match": 81.31504257332072, |
|
"eval_f1": 89.09084382975136, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 1.4554240809321651e-05, |
|
"loss": 0.4247, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_exact_match": 81.33396404919584, |
|
"eval_f1": 89.03107080645903, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.4418751693613946e-05, |
|
"loss": 0.4304, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_exact_match": 81.30558183538317, |
|
"eval_f1": 88.77952011430634, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.4283262577906243e-05, |
|
"loss": 0.4061, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_exact_match": 81.15421002838221, |
|
"eval_f1": 88.9057099874325, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 1.4147773462198537e-05, |
|
"loss": 0.4176, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_exact_match": 81.09744560075686, |
|
"eval_f1": 88.86774926551541, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.4012284346490832e-05, |
|
"loss": 0.4287, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_exact_match": 81.3907284768212, |
|
"eval_f1": 88.89156441024593, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 1.3876795230783127e-05, |
|
"loss": 0.4308, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"eval_exact_match": 81.41911069063387, |
|
"eval_f1": 88.80592923380453, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 1.3741306115075423e-05, |
|
"loss": 0.4458, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_exact_match": 81.16367076631977, |
|
"eval_f1": 88.75077232883521, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 1.3605816999367718e-05, |
|
"loss": 0.4166, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_exact_match": 81.18259224219489, |
|
"eval_f1": 88.88618647663102, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 1.3470327883660013e-05, |
|
"loss": 0.4161, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"eval_exact_match": 81.57048249763481, |
|
"eval_f1": 88.8039726637868, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 1.3334838767952307e-05, |
|
"loss": 0.4229, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_exact_match": 80.68117313150425, |
|
"eval_f1": 88.60323091277711, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1.3199349652244604e-05, |
|
"loss": 0.4162, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_exact_match": 81.0879848628193, |
|
"eval_f1": 88.76710612537406, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 1.3063860536536899e-05, |
|
"loss": 0.436, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_exact_match": 80.94607379375591, |
|
"eval_f1": 88.62676072578118, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 1.2928371420829193e-05, |
|
"loss": 0.4053, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_exact_match": 81.18259224219489, |
|
"eval_f1": 88.85559696479606, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 1.279288230512149e-05, |
|
"loss": 0.4268, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_exact_match": 81.44749290444655, |
|
"eval_f1": 89.02794352467788, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.2657393189413785e-05, |
|
"loss": 0.4387, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_exact_match": 81.57994323557237, |
|
"eval_f1": 88.94398669273697, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 1.2521904073706078e-05, |
|
"loss": 0.416, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_exact_match": 81.5515610217597, |
|
"eval_f1": 88.99506075304372, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 1.2386414957998374e-05, |
|
"loss": 0.4172, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_exact_match": 81.22989593188268, |
|
"eval_f1": 88.85658818631349, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 1.225092584229067e-05, |
|
"loss": 0.4358, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"eval_exact_match": 81.68401135288552, |
|
"eval_f1": 89.0622553654262, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 1.2115436726582964e-05, |
|
"loss": 0.4152, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_exact_match": 81.20151371807, |
|
"eval_f1": 88.97429178716622, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 1.197994761087526e-05, |
|
"loss": 0.4305, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"eval_exact_match": 81.18259224219489, |
|
"eval_f1": 88.77695235966252, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.1844458495167555e-05, |
|
"loss": 0.4281, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_exact_match": 81.33396404919584, |
|
"eval_f1": 88.78860377010713, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.170896937945985e-05, |
|
"loss": 0.4354, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_exact_match": 80.91769157994324, |
|
"eval_f1": 88.40213267079321, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.1573480263752145e-05, |
|
"loss": 0.4538, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"eval_exact_match": 81.28666035950805, |
|
"eval_f1": 88.74956515546637, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1.143799114804444e-05, |
|
"loss": 0.4211, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_exact_match": 81.0879848628193, |
|
"eval_f1": 88.67833249431447, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.1302502032336736e-05, |
|
"loss": 0.4222, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_exact_match": 81.16367076631977, |
|
"eval_f1": 88.62960238697383, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 1.1167012916629031e-05, |
|
"loss": 0.4196, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_exact_match": 81.35288552507096, |
|
"eval_f1": 88.85744103369602, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.1031523800921326e-05, |
|
"loss": 0.4201, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_exact_match": 81.19205298013244, |
|
"eval_f1": 88.59874172307164, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 1.0896034685213622e-05, |
|
"loss": 0.4055, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_exact_match": 80.68117313150425, |
|
"eval_f1": 88.45994219213983, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 1.0760545569505917e-05, |
|
"loss": 0.4094, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_exact_match": 81.76915799432356, |
|
"eval_f1": 89.01460171321759, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.0625056453798212e-05, |
|
"loss": 0.3936, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_exact_match": 81.42857142857143, |
|
"eval_f1": 88.92500663268179, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1.0489567338090507e-05, |
|
"loss": 0.4185, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_exact_match": 81.60832544938505, |
|
"eval_f1": 89.06733554933447, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.0354078222382801e-05, |
|
"loss": 0.4218, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_exact_match": 80.83254493850521, |
|
"eval_f1": 88.84006035617828, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 1.0218589106675098e-05, |
|
"loss": 0.3984, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_exact_match": 81.9678334910123, |
|
"eval_f1": 89.27324196045087, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 1.0083099990967393e-05, |
|
"loss": 0.4313, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_exact_match": 81.90160832544939, |
|
"eval_f1": 89.28118310884184, |
|
"step": 73500 |
|
} |
|
], |
|
"max_steps": 110710, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.4425835526144e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|