|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.7431385517120361, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.2665, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8448836627470603, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.7136898040771484, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 0.9087, |
|
"eval_samples_per_second": 205.786, |
|
"eval_steps_per_second": 3.301, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.2968225479125977, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.713, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8451338503877909, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.6074723601341248, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 0.9286, |
|
"eval_samples_per_second": 201.374, |
|
"eval_steps_per_second": 3.231, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.28258216381073, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.6346, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8493870402802102, |
|
"eval_f1": 0.022222222222222223, |
|
"eval_loss": 0.5231208801269531, |
|
"eval_precision": 0.19047619047619047, |
|
"eval_recall": 0.011799410029498525, |
|
"eval_runtime": 0.957, |
|
"eval_samples_per_second": 195.406, |
|
"eval_steps_per_second": 3.135, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.9985119104385376, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5555, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8656492369276958, |
|
"eval_f1": 0.10501193317422433, |
|
"eval_loss": 0.44577664136886597, |
|
"eval_precision": 0.275, |
|
"eval_recall": 0.06489675516224189, |
|
"eval_runtime": 0.9294, |
|
"eval_samples_per_second": 201.195, |
|
"eval_steps_per_second": 3.228, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.3407347202301025, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4696, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8949211908931699, |
|
"eval_f1": 0.35859519408502777, |
|
"eval_loss": 0.37153393030166626, |
|
"eval_precision": 0.4801980198019802, |
|
"eval_recall": 0.2861356932153392, |
|
"eval_runtime": 0.9105, |
|
"eval_samples_per_second": 205.39, |
|
"eval_steps_per_second": 3.295, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.2348569631576538, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3932, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9194395796847635, |
|
"eval_f1": 0.5402124430955993, |
|
"eval_loss": 0.3133937120437622, |
|
"eval_precision": 0.55625, |
|
"eval_recall": 0.5250737463126843, |
|
"eval_runtime": 0.8947, |
|
"eval_samples_per_second": 209.001, |
|
"eval_steps_per_second": 3.353, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.8620750904083252, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3299, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9276957718288716, |
|
"eval_f1": 0.6284916201117318, |
|
"eval_loss": 0.2705799341201782, |
|
"eval_precision": 0.596816976127321, |
|
"eval_recall": 0.6637168141592921, |
|
"eval_runtime": 0.8955, |
|
"eval_samples_per_second": 208.832, |
|
"eval_steps_per_second": 3.35, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.5275648832321167, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2896, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9339504628471353, |
|
"eval_f1": 0.6711772665764547, |
|
"eval_loss": 0.24331486225128174, |
|
"eval_precision": 0.62, |
|
"eval_recall": 0.7315634218289085, |
|
"eval_runtime": 0.9165, |
|
"eval_samples_per_second": 204.036, |
|
"eval_steps_per_second": 3.273, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.4570423364639282, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2656, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9354515886915187, |
|
"eval_f1": 0.6923076923076923, |
|
"eval_loss": 0.22765140235424042, |
|
"eval_precision": 0.6289156626506024, |
|
"eval_recall": 0.7699115044247787, |
|
"eval_runtime": 0.9292, |
|
"eval_samples_per_second": 201.246, |
|
"eval_steps_per_second": 3.229, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.4647762775421143, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2442, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9387040280210157, |
|
"eval_f1": 0.7088948787061994, |
|
"eval_loss": 0.20824101567268372, |
|
"eval_precision": 0.652605459057072, |
|
"eval_recall": 0.775811209439528, |
|
"eval_runtime": 0.9229, |
|
"eval_samples_per_second": 202.614, |
|
"eval_steps_per_second": 3.25, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.644608974456787, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.23, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9382036527395546, |
|
"eval_f1": 0.7078947368421052, |
|
"eval_loss": 0.2019660323858261, |
|
"eval_precision": 0.6389548693586699, |
|
"eval_recall": 0.7935103244837758, |
|
"eval_runtime": 0.908, |
|
"eval_samples_per_second": 205.937, |
|
"eval_steps_per_second": 3.304, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.0320627689361572, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2229, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9384538403802852, |
|
"eval_f1": 0.7220026350461134, |
|
"eval_loss": 0.19768132269382477, |
|
"eval_precision": 0.6523809523809524, |
|
"eval_recall": 0.8082595870206489, |
|
"eval_runtime": 0.9098, |
|
"eval_samples_per_second": 205.544, |
|
"eval_steps_per_second": 3.297, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.1117591857910156, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2132, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.940205153865399, |
|
"eval_f1": 0.7267904509283818, |
|
"eval_loss": 0.18858253955841064, |
|
"eval_precision": 0.6602409638554216, |
|
"eval_recall": 0.8082595870206489, |
|
"eval_runtime": 0.9058, |
|
"eval_samples_per_second": 206.445, |
|
"eval_steps_per_second": 3.312, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.4440829753875732, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2055, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9414560920690518, |
|
"eval_f1": 0.7294751009421264, |
|
"eval_loss": 0.18096885085105896, |
|
"eval_precision": 0.6707920792079208, |
|
"eval_recall": 0.799410029498525, |
|
"eval_runtime": 0.9231, |
|
"eval_samples_per_second": 202.575, |
|
"eval_steps_per_second": 3.25, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.746239423751831, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2038, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9404553415061296, |
|
"eval_f1": 0.7275132275132277, |
|
"eval_loss": 0.18217705190181732, |
|
"eval_precision": 0.6594724220623501, |
|
"eval_recall": 0.8112094395280236, |
|
"eval_runtime": 0.9121, |
|
"eval_samples_per_second": 205.015, |
|
"eval_steps_per_second": 3.289, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.0166144371032715, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2004, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9429572179134351, |
|
"eval_f1": 0.7393617021276596, |
|
"eval_loss": 0.17875301837921143, |
|
"eval_precision": 0.6731234866828087, |
|
"eval_recall": 0.8200589970501475, |
|
"eval_runtime": 0.9135, |
|
"eval_samples_per_second": 204.697, |
|
"eval_steps_per_second": 3.284, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.0570679903030396, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1966, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9432074055541656, |
|
"eval_f1": 0.7417218543046358, |
|
"eval_loss": 0.1774715930223465, |
|
"eval_precision": 0.6730769230769231, |
|
"eval_recall": 0.8259587020648967, |
|
"eval_runtime": 0.9125, |
|
"eval_samples_per_second": 204.93, |
|
"eval_steps_per_second": 3.288, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.0107321739196777, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1931, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9434575931948962, |
|
"eval_f1": 0.7387862796833773, |
|
"eval_loss": 0.17654407024383545, |
|
"eval_precision": 0.6682577565632458, |
|
"eval_recall": 0.8259587020648967, |
|
"eval_runtime": 0.9156, |
|
"eval_samples_per_second": 204.236, |
|
"eval_steps_per_second": 3.277, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 1.0605403184890747, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1937, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9437077808356267, |
|
"eval_f1": 0.7427055702917772, |
|
"eval_loss": 0.17490608990192413, |
|
"eval_precision": 0.6746987951807228, |
|
"eval_recall": 0.8259587020648967, |
|
"eval_runtime": 0.9207, |
|
"eval_samples_per_second": 203.097, |
|
"eval_steps_per_second": 3.258, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.278860092163086, |
|
"learning_rate": 0.0, |
|
"loss": 0.1888, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9444583437578183, |
|
"eval_f1": 0.7483355525965381, |
|
"eval_loss": 0.17425791919231415, |
|
"eval_precision": 0.6820388349514563, |
|
"eval_recall": 0.8289085545722714, |
|
"eval_runtime": 0.9189, |
|
"eval_samples_per_second": 203.506, |
|
"eval_steps_per_second": 3.265, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2120, |
|
"total_flos": 901149122771520.0, |
|
"train_loss": 0.36049160057643675, |
|
"train_runtime": 244.1252, |
|
"train_samples_per_second": 138.208, |
|
"train_steps_per_second": 8.684 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 901149122771520.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|