|
{ |
|
"best_metric": 0.9879474725670084, |
|
"best_model_checkpoint": "pasha/checkpoint-500", |
|
"epoch": 21.27659574468085, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy": 0.9570877531340405, |
|
"eval_f1": 0.9485879797248371, |
|
"eval_loss": 0.2664182484149933, |
|
"eval_precision": 0.9534206695778749, |
|
"eval_recall": 0.9438040345821326, |
|
"eval_runtime": 13.9899, |
|
"eval_samples_per_second": 13.51, |
|
"eval_steps_per_second": 0.858, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_accuracy": 0.983847637415622, |
|
"eval_f1": 0.9778975741239893, |
|
"eval_loss": 0.10435084253549576, |
|
"eval_precision": 0.9756185012549301, |
|
"eval_recall": 0.9801873198847262, |
|
"eval_runtime": 13.823, |
|
"eval_samples_per_second": 13.673, |
|
"eval_steps_per_second": 0.868, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_accuracy": 0.9903567984570878, |
|
"eval_f1": 0.987601078167116, |
|
"eval_loss": 0.06718672811985016, |
|
"eval_precision": 0.9852993904625313, |
|
"eval_recall": 0.9899135446685879, |
|
"eval_runtime": 13.9839, |
|
"eval_samples_per_second": 13.516, |
|
"eval_steps_per_second": 0.858, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"eval_accuracy": 0.9884281581485053, |
|
"eval_f1": 0.9841783531103919, |
|
"eval_loss": 0.06342343986034393, |
|
"eval_precision": 0.9824120603015075, |
|
"eval_recall": 0.9859510086455331, |
|
"eval_runtime": 13.8826, |
|
"eval_samples_per_second": 13.614, |
|
"eval_steps_per_second": 0.864, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2958, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"eval_accuracy": 0.9905978784956606, |
|
"eval_f1": 0.9879474725670084, |
|
"eval_loss": 0.058533914387226105, |
|
"eval_precision": 0.986704994610133, |
|
"eval_recall": 0.989193083573487, |
|
"eval_runtime": 13.739, |
|
"eval_samples_per_second": 13.756, |
|
"eval_steps_per_second": 0.873, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"eval_accuracy": 0.9927675988428158, |
|
"eval_f1": 0.9908322847384505, |
|
"eval_loss": 0.051136456429958344, |
|
"eval_precision": 0.9888769285970578, |
|
"eval_recall": 0.9927953890489913, |
|
"eval_runtime": 14.0393, |
|
"eval_samples_per_second": 13.462, |
|
"eval_steps_per_second": 0.855, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"eval_accuracy": 0.992526518804243, |
|
"eval_f1": 0.9895795903701042, |
|
"eval_loss": 0.05025022476911545, |
|
"eval_precision": 0.9870967741935484, |
|
"eval_recall": 0.9920749279538905, |
|
"eval_runtime": 14.0132, |
|
"eval_samples_per_second": 13.487, |
|
"eval_steps_per_second": 0.856, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_accuracy": 0.991321118611379, |
|
"eval_f1": 0.9881380301941048, |
|
"eval_loss": 0.05291323363780975, |
|
"eval_precision": 0.9860114777618364, |
|
"eval_recall": 0.9902737752161384, |
|
"eval_runtime": 13.9493, |
|
"eval_samples_per_second": 13.549, |
|
"eval_steps_per_second": 0.86, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"eval_accuracy": 0.9903567984570878, |
|
"eval_f1": 0.986704994610133, |
|
"eval_loss": 0.0581122450530529, |
|
"eval_precision": 0.9842293906810036, |
|
"eval_recall": 0.989193083573487, |
|
"eval_runtime": 13.9545, |
|
"eval_samples_per_second": 13.544, |
|
"eval_steps_per_second": 0.86, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 0.0, |
|
"loss": 0.0256, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"eval_accuracy": 0.990115718418515, |
|
"eval_f1": 0.9868775840373899, |
|
"eval_loss": 0.057054802775382996, |
|
"eval_precision": 0.984930032292788, |
|
"eval_recall": 0.9888328530259366, |
|
"eval_runtime": 14.2093, |
|
"eval_samples_per_second": 13.301, |
|
"eval_steps_per_second": 0.845, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"step": 1000, |
|
"total_flos": 4247054450688000.0, |
|
"train_loss": 0.16068801975250244, |
|
"train_runtime": 1757.2853, |
|
"train_samples_per_second": 9.105, |
|
"train_steps_per_second": 0.569 |
|
} |
|
], |
|
"max_steps": 1000, |
|
"num_train_epochs": 22, |
|
"total_flos": 4247054450688000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|