|
{ |
|
"best_metric": 0.8868703550784476, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-masakhaner-ibo/checkpoint-3000", |
|
"epoch": 74.28571428571429, |
|
"global_step": 5200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.86, |
|
"eval_accuracy_score": 0.9765228426395939, |
|
"eval_f1": 0.8673300165837479, |
|
"eval_loss": 0.08876766264438629, |
|
"eval_precision": 0.863036303630363, |
|
"eval_recall": 0.8716666666666667, |
|
"eval_runtime": 4.4473, |
|
"eval_samples_per_second": 71.954, |
|
"eval_steps_per_second": 8.994, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_accuracy_score": 0.9741434010152284, |
|
"eval_f1": 0.8617886178861788, |
|
"eval_loss": 0.11480691283941269, |
|
"eval_precision": 0.8412698412698413, |
|
"eval_recall": 0.8833333333333333, |
|
"eval_runtime": 4.4462, |
|
"eval_samples_per_second": 71.972, |
|
"eval_steps_per_second": 8.997, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.1451, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_accuracy_score": 0.9781091370558376, |
|
"eval_f1": 0.874074074074074, |
|
"eval_loss": 0.10442519187927246, |
|
"eval_precision": 0.8634146341463415, |
|
"eval_recall": 0.885, |
|
"eval_runtime": 4.449, |
|
"eval_samples_per_second": 71.926, |
|
"eval_steps_per_second": 8.991, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"eval_accuracy_score": 0.9766814720812182, |
|
"eval_f1": 0.8705688375927452, |
|
"eval_loss": 0.11804218590259552, |
|
"eval_precision": 0.8613376835236541, |
|
"eval_recall": 0.88, |
|
"eval_runtime": 4.4511, |
|
"eval_samples_per_second": 71.893, |
|
"eval_steps_per_second": 8.987, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.0084, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_accuracy_score": 0.9746192893401016, |
|
"eval_f1": 0.8457047539616346, |
|
"eval_loss": 0.1311611831188202, |
|
"eval_precision": 0.8464106844741235, |
|
"eval_recall": 0.845, |
|
"eval_runtime": 4.4345, |
|
"eval_samples_per_second": 72.162, |
|
"eval_steps_per_second": 9.02, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"eval_accuracy_score": 0.9763642131979695, |
|
"eval_f1": 0.8714168714168715, |
|
"eval_loss": 0.134602889418602, |
|
"eval_precision": 0.856682769726248, |
|
"eval_recall": 0.8866666666666667, |
|
"eval_runtime": 4.4493, |
|
"eval_samples_per_second": 71.921, |
|
"eval_steps_per_second": 8.99, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy_score": 0.9803299492385786, |
|
"eval_f1": 0.8872180451127819, |
|
"eval_loss": 0.1307426393032074, |
|
"eval_precision": 0.8894472361809045, |
|
"eval_recall": 0.885, |
|
"eval_runtime": 4.442, |
|
"eval_samples_per_second": 72.039, |
|
"eval_steps_per_second": 9.005, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.005, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"eval_accuracy_score": 0.9771573604060914, |
|
"eval_f1": 0.8767576509511993, |
|
"eval_loss": 0.14564308524131775, |
|
"eval_precision": 0.8702791461412152, |
|
"eval_recall": 0.8833333333333333, |
|
"eval_runtime": 4.4473, |
|
"eval_samples_per_second": 71.954, |
|
"eval_steps_per_second": 8.994, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"eval_accuracy_score": 0.9771573604060914, |
|
"eval_f1": 0.8782104391052196, |
|
"eval_loss": 0.13427743315696716, |
|
"eval_precision": 0.8731466227347611, |
|
"eval_recall": 0.8833333333333333, |
|
"eval_runtime": 4.443, |
|
"eval_samples_per_second": 72.023, |
|
"eval_steps_per_second": 9.003, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0039, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"eval_accuracy_score": 0.9762055837563451, |
|
"eval_f1": 0.8687707641196013, |
|
"eval_loss": 0.15129442512989044, |
|
"eval_precision": 0.8658940397350994, |
|
"eval_recall": 0.8716666666666667, |
|
"eval_runtime": 4.4359, |
|
"eval_samples_per_second": 72.139, |
|
"eval_steps_per_second": 9.017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 31.43, |
|
"eval_accuracy_score": 0.9750951776649747, |
|
"eval_f1": 0.8700754400670577, |
|
"eval_loss": 0.16774575412273407, |
|
"eval_precision": 0.8752107925801011, |
|
"eval_recall": 0.865, |
|
"eval_runtime": 4.4409, |
|
"eval_samples_per_second": 72.057, |
|
"eval_steps_per_second": 9.007, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 34.29, |
|
"eval_accuracy_score": 0.9749365482233503, |
|
"eval_f1": 0.8601973684210527, |
|
"eval_loss": 0.1633668839931488, |
|
"eval_precision": 0.849025974025974, |
|
"eval_recall": 0.8716666666666667, |
|
"eval_runtime": 4.4452, |
|
"eval_samples_per_second": 71.988, |
|
"eval_steps_per_second": 8.998, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0036, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 37.14, |
|
"eval_accuracy_score": 0.9766814720812182, |
|
"eval_f1": 0.8782104391052196, |
|
"eval_loss": 0.16212213039398193, |
|
"eval_precision": 0.8731466227347611, |
|
"eval_recall": 0.8833333333333333, |
|
"eval_runtime": 4.4427, |
|
"eval_samples_per_second": 72.028, |
|
"eval_steps_per_second": 9.004, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy_score": 0.9763642131979695, |
|
"eval_f1": 0.8719665271966527, |
|
"eval_loss": 0.16754868626594543, |
|
"eval_precision": 0.8756302521008403, |
|
"eval_recall": 0.8683333333333333, |
|
"eval_runtime": 4.4401, |
|
"eval_samples_per_second": 72.07, |
|
"eval_steps_per_second": 9.009, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0023, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"eval_accuracy_score": 0.9790609137055838, |
|
"eval_f1": 0.8868703550784476, |
|
"eval_loss": 0.14278778433799744, |
|
"eval_precision": 0.8788870703764321, |
|
"eval_recall": 0.895, |
|
"eval_runtime": 4.4323, |
|
"eval_samples_per_second": 72.197, |
|
"eval_steps_per_second": 9.025, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 45.71, |
|
"eval_accuracy_score": 0.9782677664974619, |
|
"eval_f1": 0.8855721393034827, |
|
"eval_loss": 0.14296667277812958, |
|
"eval_precision": 0.8811881188118812, |
|
"eval_recall": 0.89, |
|
"eval_runtime": 4.4374, |
|
"eval_samples_per_second": 72.114, |
|
"eval_steps_per_second": 9.014, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 48.57, |
|
"eval_accuracy_score": 0.9781091370558376, |
|
"eval_f1": 0.8870703764320785, |
|
"eval_loss": 0.14033867418766022, |
|
"eval_precision": 0.8713826366559485, |
|
"eval_recall": 0.9033333333333333, |
|
"eval_runtime": 4.4359, |
|
"eval_samples_per_second": 72.138, |
|
"eval_steps_per_second": 9.017, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0026, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 51.43, |
|
"eval_accuracy_score": 0.9749365482233503, |
|
"eval_f1": 0.8693467336683417, |
|
"eval_loss": 0.18670859932899475, |
|
"eval_precision": 0.8737373737373737, |
|
"eval_recall": 0.865, |
|
"eval_runtime": 4.4362, |
|
"eval_samples_per_second": 72.133, |
|
"eval_steps_per_second": 9.017, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 54.29, |
|
"eval_accuracy_score": 0.9803299492385786, |
|
"eval_f1": 0.8937908496732027, |
|
"eval_loss": 0.1545487940311432, |
|
"eval_precision": 0.8766025641025641, |
|
"eval_recall": 0.9116666666666666, |
|
"eval_runtime": 4.4428, |
|
"eval_samples_per_second": 72.027, |
|
"eval_steps_per_second": 9.003, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.0021, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"eval_accuracy_score": 0.975253807106599, |
|
"eval_f1": 0.8647302904564315, |
|
"eval_loss": 0.1578870564699173, |
|
"eval_precision": 0.8611570247933884, |
|
"eval_recall": 0.8683333333333333, |
|
"eval_runtime": 4.4357, |
|
"eval_samples_per_second": 72.143, |
|
"eval_steps_per_second": 9.018, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy_score": 0.9792195431472082, |
|
"eval_f1": 0.8872305140961857, |
|
"eval_loss": 0.14871039986610413, |
|
"eval_precision": 0.8828382838283828, |
|
"eval_recall": 0.8916666666666667, |
|
"eval_runtime": 4.4317, |
|
"eval_samples_per_second": 72.207, |
|
"eval_steps_per_second": 9.026, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 62.86, |
|
"eval_accuracy_score": 0.9758883248730964, |
|
"eval_f1": 0.865546218487395, |
|
"eval_loss": 0.16632404923439026, |
|
"eval_precision": 0.8728813559322034, |
|
"eval_recall": 0.8583333333333333, |
|
"eval_runtime": 4.4414, |
|
"eval_samples_per_second": 72.05, |
|
"eval_steps_per_second": 9.006, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 64.29, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.0026, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 65.71, |
|
"eval_accuracy_score": 0.9747779187817259, |
|
"eval_f1": 0.865721434528774, |
|
"eval_loss": 0.15900883078575134, |
|
"eval_precision": 0.8664440734557596, |
|
"eval_recall": 0.865, |
|
"eval_runtime": 4.441, |
|
"eval_samples_per_second": 72.055, |
|
"eval_steps_per_second": 9.007, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 68.57, |
|
"eval_accuracy_score": 0.9789022842639594, |
|
"eval_f1": 0.8827470686767169, |
|
"eval_loss": 0.16089987754821777, |
|
"eval_precision": 0.8872053872053872, |
|
"eval_recall": 0.8783333333333333, |
|
"eval_runtime": 4.4442, |
|
"eval_samples_per_second": 72.004, |
|
"eval_steps_per_second": 9.001, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0017, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"eval_accuracy_score": 0.9750951776649747, |
|
"eval_f1": 0.8611339359079703, |
|
"eval_loss": 0.1948554664850235, |
|
"eval_precision": 0.8492706645056726, |
|
"eval_recall": 0.8733333333333333, |
|
"eval_runtime": 12.3435, |
|
"eval_samples_per_second": 25.925, |
|
"eval_steps_per_second": 3.241, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 74.29, |
|
"eval_accuracy_score": 0.9777918781725888, |
|
"eval_f1": 0.886158886158886, |
|
"eval_loss": 0.15537256002426147, |
|
"eval_precision": 0.8711755233494364, |
|
"eval_recall": 0.9016666666666666, |
|
"eval_runtime": 4.4385, |
|
"eval_samples_per_second": 72.096, |
|
"eval_steps_per_second": 9.012, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 74.29, |
|
"step": 5200, |
|
"total_flos": 2.169292304429568e+16, |
|
"train_loss": 0.01710937021443477, |
|
"train_runtime": 8154.5313, |
|
"train_samples_per_second": 58.863, |
|
"train_steps_per_second": 1.839 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 215, |
|
"total_flos": 2.169292304429568e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|