|
{ |
|
"best_metric": 0.7951020408163265, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-masakhaner-pcm/checkpoint-7000", |
|
"epoch": 119.40298507462687, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy_score": 0.8950079239302694, |
|
"eval_f1": 0.4056413700470114, |
|
"eval_loss": 0.31147781014442444, |
|
"eval_precision": 0.3420158550396376, |
|
"eval_recall": 0.49834983498349833, |
|
"eval_runtime": 2.4074, |
|
"eval_samples_per_second": 127.107, |
|
"eval_steps_per_second": 16.2, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_accuracy_score": 0.9330427892234548, |
|
"eval_f1": 0.541019955654102, |
|
"eval_loss": 0.22155305743217468, |
|
"eval_precision": 0.4899598393574297, |
|
"eval_recall": 0.6039603960396039, |
|
"eval_runtime": 2.4071, |
|
"eval_samples_per_second": 127.125, |
|
"eval_steps_per_second": 16.202, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.3727, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_accuracy_score": 0.9498151082937136, |
|
"eval_f1": 0.6552238805970149, |
|
"eval_loss": 0.18555797636508942, |
|
"eval_precision": 0.5980926430517711, |
|
"eval_recall": 0.7244224422442245, |
|
"eval_runtime": 2.4132, |
|
"eval_samples_per_second": 126.802, |
|
"eval_steps_per_second": 16.161, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"eval_accuracy_score": 0.95932382461701, |
|
"eval_f1": 0.7168627450980393, |
|
"eval_loss": 0.1749705672264099, |
|
"eval_precision": 0.6831091180866966, |
|
"eval_recall": 0.7541254125412541, |
|
"eval_runtime": 2.408, |
|
"eval_samples_per_second": 127.076, |
|
"eval_steps_per_second": 16.196, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.0555, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"eval_accuracy_score": 0.9578711040676176, |
|
"eval_f1": 0.7334360554699538, |
|
"eval_loss": 0.1944570541381836, |
|
"eval_precision": 0.6878612716763006, |
|
"eval_recall": 0.7854785478547854, |
|
"eval_runtime": 2.4095, |
|
"eval_samples_per_second": 127.0, |
|
"eval_steps_per_second": 16.186, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"eval_accuracy_score": 0.9647385103011094, |
|
"eval_f1": 0.7670364500792393, |
|
"eval_loss": 0.18250040709972382, |
|
"eval_precision": 0.7378048780487805, |
|
"eval_recall": 0.7986798679867987, |
|
"eval_runtime": 2.4111, |
|
"eval_samples_per_second": 126.911, |
|
"eval_steps_per_second": 16.175, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy_score": 0.9630216587427364, |
|
"eval_f1": 0.7384615384615385, |
|
"eval_loss": 0.19533774256706238, |
|
"eval_precision": 0.724960254372019, |
|
"eval_recall": 0.7524752475247525, |
|
"eval_runtime": 2.4014, |
|
"eval_samples_per_second": 127.425, |
|
"eval_steps_per_second": 16.24, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0105, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"eval_accuracy_score": 0.9644743792921289, |
|
"eval_f1": 0.7389937106918238, |
|
"eval_loss": 0.19444848597049713, |
|
"eval_precision": 0.7057057057057057, |
|
"eval_recall": 0.7755775577557755, |
|
"eval_runtime": 2.4134, |
|
"eval_samples_per_second": 126.79, |
|
"eval_steps_per_second": 16.16, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 26.87, |
|
"eval_accuracy_score": 0.963946117274168, |
|
"eval_f1": 0.7565424266455195, |
|
"eval_loss": 0.211569145321846, |
|
"eval_precision": 0.7282442748091603, |
|
"eval_recall": 0.7871287128712872, |
|
"eval_runtime": 2.4076, |
|
"eval_samples_per_second": 127.097, |
|
"eval_steps_per_second": 16.199, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0055, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"eval_accuracy_score": 0.9611727416798732, |
|
"eval_f1": 0.7248979591836734, |
|
"eval_loss": 0.23255787789821625, |
|
"eval_precision": 0.7172859450726979, |
|
"eval_recall": 0.7326732673267327, |
|
"eval_runtime": 2.4063, |
|
"eval_samples_per_second": 127.167, |
|
"eval_steps_per_second": 16.208, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.84, |
|
"eval_accuracy_score": 0.9650026413100898, |
|
"eval_f1": 0.7594339622641509, |
|
"eval_loss": 0.21984447538852692, |
|
"eval_precision": 0.7252252252252253, |
|
"eval_recall": 0.7970297029702971, |
|
"eval_runtime": 2.4107, |
|
"eval_samples_per_second": 126.936, |
|
"eval_steps_per_second": 16.178, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 35.82, |
|
"eval_accuracy_score": 0.963946117274168, |
|
"eval_f1": 0.7598736176935229, |
|
"eval_loss": 0.2293044477701187, |
|
"eval_precision": 0.7287878787878788, |
|
"eval_recall": 0.7937293729372937, |
|
"eval_runtime": 2.4114, |
|
"eval_samples_per_second": 126.899, |
|
"eval_steps_per_second": 16.173, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 37.31, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0038, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 38.81, |
|
"eval_accuracy_score": 0.9643423137876387, |
|
"eval_f1": 0.7497975708502024, |
|
"eval_loss": 0.23353905975818634, |
|
"eval_precision": 0.7360890302066773, |
|
"eval_recall": 0.764026402640264, |
|
"eval_runtime": 2.4071, |
|
"eval_samples_per_second": 127.125, |
|
"eval_steps_per_second": 16.202, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 41.79, |
|
"eval_accuracy_score": 0.9655309033280507, |
|
"eval_f1": 0.7648, |
|
"eval_loss": 0.224817156791687, |
|
"eval_precision": 0.7422360248447205, |
|
"eval_recall": 0.7887788778877888, |
|
"eval_runtime": 2.4046, |
|
"eval_samples_per_second": 127.255, |
|
"eval_steps_per_second": 16.219, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0032, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"eval_accuracy_score": 0.95932382461701, |
|
"eval_f1": 0.7264076130055511, |
|
"eval_loss": 0.26162534952163696, |
|
"eval_precision": 0.6992366412213741, |
|
"eval_recall": 0.7557755775577558, |
|
"eval_runtime": 2.3994, |
|
"eval_samples_per_second": 127.53, |
|
"eval_steps_per_second": 16.254, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 47.76, |
|
"eval_accuracy_score": 0.9638140517696777, |
|
"eval_f1": 0.7374392220421394, |
|
"eval_loss": 0.2429530769586563, |
|
"eval_precision": 0.7245222929936306, |
|
"eval_recall": 0.7508250825082509, |
|
"eval_runtime": 2.4056, |
|
"eval_samples_per_second": 127.201, |
|
"eval_steps_per_second": 16.212, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 50.75, |
|
"eval_accuracy_score": 0.9646064447966192, |
|
"eval_f1": 0.7631578947368421, |
|
"eval_loss": 0.2461545169353485, |
|
"eval_precision": 0.760655737704918, |
|
"eval_recall": 0.7656765676567657, |
|
"eval_runtime": 2.4169, |
|
"eval_samples_per_second": 126.61, |
|
"eval_steps_per_second": 16.137, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 52.24, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0027, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 53.73, |
|
"eval_accuracy_score": 0.9615689381933439, |
|
"eval_f1": 0.7359870024370431, |
|
"eval_loss": 0.25643840432167053, |
|
"eval_precision": 0.7248, |
|
"eval_recall": 0.7475247524752475, |
|
"eval_runtime": 2.402, |
|
"eval_samples_per_second": 127.392, |
|
"eval_steps_per_second": 16.236, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 56.72, |
|
"eval_accuracy_score": 0.96513470681458, |
|
"eval_f1": 0.7636932707355242, |
|
"eval_loss": 0.24773868918418884, |
|
"eval_precision": 0.7261904761904762, |
|
"eval_recall": 0.8052805280528053, |
|
"eval_runtime": 2.406, |
|
"eval_samples_per_second": 127.183, |
|
"eval_steps_per_second": 16.21, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 59.7, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.0019, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 59.7, |
|
"eval_accuracy_score": 0.9615689381933439, |
|
"eval_f1": 0.7295796986518637, |
|
"eval_loss": 0.27105194330215454, |
|
"eval_precision": 0.7022900763358778, |
|
"eval_recall": 0.759075907590759, |
|
"eval_runtime": 2.4067, |
|
"eval_samples_per_second": 127.143, |
|
"eval_steps_per_second": 16.204, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 62.69, |
|
"eval_accuracy_score": 0.9620972002113048, |
|
"eval_f1": 0.7459807073954984, |
|
"eval_loss": 0.2745388150215149, |
|
"eval_precision": 0.7272727272727273, |
|
"eval_recall": 0.7656765676567657, |
|
"eval_runtime": 2.4067, |
|
"eval_samples_per_second": 127.143, |
|
"eval_steps_per_second": 16.205, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 65.67, |
|
"eval_accuracy_score": 0.9632857897517169, |
|
"eval_f1": 0.7472353870458136, |
|
"eval_loss": 0.26222512125968933, |
|
"eval_precision": 0.7166666666666667, |
|
"eval_recall": 0.7805280528052805, |
|
"eval_runtime": 2.4079, |
|
"eval_samples_per_second": 127.079, |
|
"eval_steps_per_second": 16.196, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 67.16, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.0019, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 68.66, |
|
"eval_accuracy_score": 0.9644743792921289, |
|
"eval_f1": 0.7557677008750995, |
|
"eval_loss": 0.25255098938941956, |
|
"eval_precision": 0.7296466973886329, |
|
"eval_recall": 0.7838283828382838, |
|
"eval_runtime": 2.4095, |
|
"eval_samples_per_second": 126.996, |
|
"eval_steps_per_second": 16.186, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 71.64, |
|
"eval_accuracy_score": 0.9657950343370312, |
|
"eval_f1": 0.765079365079365, |
|
"eval_loss": 0.2474120408296585, |
|
"eval_precision": 0.7370030581039755, |
|
"eval_recall": 0.7953795379537953, |
|
"eval_runtime": 2.4085, |
|
"eval_samples_per_second": 127.049, |
|
"eval_steps_per_second": 16.192, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 74.63, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0018, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 74.63, |
|
"eval_accuracy_score": 0.9653988378235605, |
|
"eval_f1": 0.7618296529968455, |
|
"eval_loss": 0.2491973489522934, |
|
"eval_precision": 0.729607250755287, |
|
"eval_recall": 0.7970297029702971, |
|
"eval_runtime": 2.4082, |
|
"eval_samples_per_second": 127.066, |
|
"eval_steps_per_second": 16.195, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 77.61, |
|
"eval_accuracy_score": 0.963946117274168, |
|
"eval_f1": 0.7564604541895067, |
|
"eval_loss": 0.2652634084224701, |
|
"eval_precision": 0.7198211624441133, |
|
"eval_recall": 0.7970297029702971, |
|
"eval_runtime": 2.4091, |
|
"eval_samples_per_second": 127.02, |
|
"eval_steps_per_second": 16.189, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 80.6, |
|
"eval_accuracy_score": 0.963946117274168, |
|
"eval_f1": 0.7577235772357722, |
|
"eval_loss": 0.2766047418117523, |
|
"eval_precision": 0.7467948717948718, |
|
"eval_recall": 0.768976897689769, |
|
"eval_runtime": 2.4127, |
|
"eval_samples_per_second": 126.83, |
|
"eval_steps_per_second": 16.165, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 82.09, |
|
"learning_rate": 3.1879194630872485e-05, |
|
"loss": 0.0012, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 83.58, |
|
"eval_accuracy_score": 0.9627575277337559, |
|
"eval_f1": 0.7384855581576891, |
|
"eval_loss": 0.2791267931461334, |
|
"eval_precision": 0.7007407407407408, |
|
"eval_recall": 0.7805280528052805, |
|
"eval_runtime": 2.4092, |
|
"eval_samples_per_second": 127.015, |
|
"eval_steps_per_second": 16.188, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 86.57, |
|
"eval_accuracy_score": 0.9672477548864237, |
|
"eval_f1": 0.7776000000000001, |
|
"eval_loss": 0.24720582365989685, |
|
"eval_precision": 0.7546583850931677, |
|
"eval_recall": 0.801980198019802, |
|
"eval_runtime": 2.4153, |
|
"eval_samples_per_second": 126.694, |
|
"eval_steps_per_second": 16.147, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 89.55, |
|
"learning_rate": 3.02013422818792e-05, |
|
"loss": 0.0013, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 89.55, |
|
"eval_accuracy_score": 0.9664553618594823, |
|
"eval_f1": 0.7794707297514034, |
|
"eval_loss": 0.25364401936531067, |
|
"eval_precision": 0.7581903276131046, |
|
"eval_recall": 0.801980198019802, |
|
"eval_runtime": 2.4087, |
|
"eval_samples_per_second": 127.037, |
|
"eval_steps_per_second": 16.191, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 92.54, |
|
"eval_accuracy_score": 0.9676439513998943, |
|
"eval_f1": 0.7832393231265109, |
|
"eval_loss": 0.25377386808395386, |
|
"eval_precision": 0.7653543307086614, |
|
"eval_recall": 0.801980198019802, |
|
"eval_runtime": 2.4146, |
|
"eval_samples_per_second": 126.731, |
|
"eval_steps_per_second": 16.152, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 95.52, |
|
"eval_accuracy_score": 0.9689646064447967, |
|
"eval_f1": 0.7859424920127794, |
|
"eval_loss": 0.23607416450977325, |
|
"eval_precision": 0.7616099071207431, |
|
"eval_recall": 0.8118811881188119, |
|
"eval_runtime": 2.4167, |
|
"eval_samples_per_second": 126.62, |
|
"eval_steps_per_second": 16.138, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 97.01, |
|
"learning_rate": 2.8523489932885905e-05, |
|
"loss": 0.0014, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 98.51, |
|
"eval_accuracy_score": 0.9696249339672478, |
|
"eval_f1": 0.784155214227971, |
|
"eval_loss": 0.22985981404781342, |
|
"eval_precision": 0.768621236133122, |
|
"eval_recall": 0.8003300330033003, |
|
"eval_runtime": 2.4152, |
|
"eval_samples_per_second": 126.699, |
|
"eval_steps_per_second": 16.148, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 101.49, |
|
"eval_accuracy_score": 0.969228737453777, |
|
"eval_f1": 0.7798537774167345, |
|
"eval_loss": 0.2279633730649948, |
|
"eval_precision": 0.768, |
|
"eval_recall": 0.7920792079207921, |
|
"eval_runtime": 2.4192, |
|
"eval_samples_per_second": 126.488, |
|
"eval_steps_per_second": 16.121, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 104.48, |
|
"learning_rate": 2.6845637583892618e-05, |
|
"loss": 0.001, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 104.48, |
|
"eval_accuracy_score": 0.9705493924986793, |
|
"eval_f1": 0.7951020408163265, |
|
"eval_loss": 0.23545745015144348, |
|
"eval_precision": 0.7867528271405493, |
|
"eval_recall": 0.8036303630363036, |
|
"eval_runtime": 2.41, |
|
"eval_samples_per_second": 126.97, |
|
"eval_steps_per_second": 16.182, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 107.46, |
|
"eval_accuracy_score": 0.9688325409403064, |
|
"eval_f1": 0.7739340305711988, |
|
"eval_loss": 0.23859038949012756, |
|
"eval_precision": 0.7551020408163265, |
|
"eval_recall": 0.7937293729372937, |
|
"eval_runtime": 2.4059, |
|
"eval_samples_per_second": 127.189, |
|
"eval_steps_per_second": 16.21, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 110.45, |
|
"eval_accuracy_score": 0.9684363444268357, |
|
"eval_f1": 0.7909967845659165, |
|
"eval_loss": 0.2557569444179535, |
|
"eval_precision": 0.7711598746081505, |
|
"eval_recall": 0.8118811881188119, |
|
"eval_runtime": 2.4083, |
|
"eval_samples_per_second": 127.062, |
|
"eval_steps_per_second": 16.194, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 111.94, |
|
"learning_rate": 2.516778523489933e-05, |
|
"loss": 0.0008, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 113.43, |
|
"eval_accuracy_score": 0.9683042789223455, |
|
"eval_f1": 0.7780429594272077, |
|
"eval_loss": 0.2508944869041443, |
|
"eval_precision": 0.7511520737327189, |
|
"eval_recall": 0.806930693069307, |
|
"eval_runtime": 2.4124, |
|
"eval_samples_per_second": 126.846, |
|
"eval_steps_per_second": 16.167, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 116.42, |
|
"eval_accuracy_score": 0.9681722134178553, |
|
"eval_f1": 0.7712206952303962, |
|
"eval_loss": 0.2467135787010193, |
|
"eval_precision": 0.7559429477020603, |
|
"eval_recall": 0.7871287128712872, |
|
"eval_runtime": 2.4114, |
|
"eval_samples_per_second": 126.896, |
|
"eval_steps_per_second": 16.173, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 119.4, |
|
"learning_rate": 2.348993288590604e-05, |
|
"loss": 0.0011, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 119.4, |
|
"eval_accuracy_score": 0.966323296354992, |
|
"eval_f1": 0.7704654895666132, |
|
"eval_loss": 0.24946843087673187, |
|
"eval_precision": 0.75, |
|
"eval_recall": 0.7920792079207921, |
|
"eval_runtime": 2.4125, |
|
"eval_samples_per_second": 126.837, |
|
"eval_steps_per_second": 16.166, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 119.4, |
|
"step": 8000, |
|
"total_flos": 4.164857671044096e+16, |
|
"train_loss": 0.029139109283685686, |
|
"train_runtime": 3971.1585, |
|
"train_samples_per_second": 120.872, |
|
"train_steps_per_second": 3.777 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 224, |
|
"total_flos": 4.164857671044096e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|