|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.168577671051025, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5663, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7293233082706767, |
|
"eval_f1": 0.6572164948453608, |
|
"eval_loss": 0.5216349363327026, |
|
"eval_precision": 0.6677272727272727, |
|
"eval_recall": 0.6509819967266777, |
|
"eval_runtime": 4.6427, |
|
"eval_samples_per_second": 85.941, |
|
"eval_steps_per_second": 10.769, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.071716785430908, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5149, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.681013431013431, |
|
"eval_loss": 0.5133972764015198, |
|
"eval_precision": 0.6758417508417509, |
|
"eval_recall": 0.6899436261138389, |
|
"eval_runtime": 5.1009, |
|
"eval_samples_per_second": 78.221, |
|
"eval_steps_per_second": 9.802, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.960922956466675, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4925, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7568922305764411, |
|
"eval_f1": 0.7014049083067808, |
|
"eval_loss": 0.48209038376808167, |
|
"eval_precision": 0.70548914753067, |
|
"eval_recall": 0.6979905437352246, |
|
"eval_runtime": 5.0731, |
|
"eval_samples_per_second": 78.65, |
|
"eval_steps_per_second": 9.856, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.4121310710906982, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4608, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7644110275689223, |
|
"eval_f1": 0.7114035087719299, |
|
"eval_loss": 0.46544694900512695, |
|
"eval_precision": 0.7149962462462462, |
|
"eval_recall": 0.7083106019276232, |
|
"eval_runtime": 5.1025, |
|
"eval_samples_per_second": 78.197, |
|
"eval_steps_per_second": 9.799, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.662163734436035, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4493, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7568922305764411, |
|
"eval_f1": 0.7193095695688436, |
|
"eval_loss": 0.46004626154899597, |
|
"eval_precision": 0.7126202191903377, |
|
"eval_recall": 0.7304964539007093, |
|
"eval_runtime": 5.0734, |
|
"eval_samples_per_second": 78.645, |
|
"eval_steps_per_second": 9.855, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.3096864223480225, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4257, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7869674185463659, |
|
"eval_f1": 0.7369400814426992, |
|
"eval_loss": 0.4306720495223999, |
|
"eval_precision": 0.7433180603283696, |
|
"eval_recall": 0.7317694126204765, |
|
"eval_runtime": 5.1042, |
|
"eval_samples_per_second": 78.17, |
|
"eval_steps_per_second": 9.796, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.605820894241333, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.4178, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7969924812030075, |
|
"eval_f1": 0.7580931280454488, |
|
"eval_loss": 0.4180676341056824, |
|
"eval_precision": 0.7551784340415459, |
|
"eval_recall": 0.7613657028550646, |
|
"eval_runtime": 5.0532, |
|
"eval_samples_per_second": 78.959, |
|
"eval_steps_per_second": 9.895, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.129829406738281, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3977, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8070175438596491, |
|
"eval_f1": 0.7616986620127981, |
|
"eval_loss": 0.3971773684024811, |
|
"eval_precision": 0.7687094310805651, |
|
"eval_recall": 0.7559556282960538, |
|
"eval_runtime": 5.047, |
|
"eval_samples_per_second": 79.057, |
|
"eval_steps_per_second": 9.907, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.125335693359375, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3946, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8145363408521303, |
|
"eval_f1": 0.7715978711553932, |
|
"eval_loss": 0.3936789035797119, |
|
"eval_precision": 0.7779341980385954, |
|
"eval_recall": 0.7662756864884525, |
|
"eval_runtime": 5.0565, |
|
"eval_samples_per_second": 78.909, |
|
"eval_steps_per_second": 9.888, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 9.01931095123291, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3762, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.7738095238095237, |
|
"eval_loss": 0.38743722438812256, |
|
"eval_precision": 0.7995467004005903, |
|
"eval_recall": 0.7583651573013275, |
|
"eval_runtime": 5.1497, |
|
"eval_samples_per_second": 77.481, |
|
"eval_steps_per_second": 9.709, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.317044734954834, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3727, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.7915149151881459, |
|
"eval_loss": 0.3787190616130829, |
|
"eval_precision": 0.801371627277996, |
|
"eval_recall": 0.7836879432624113, |
|
"eval_runtime": 5.0708, |
|
"eval_samples_per_second": 78.687, |
|
"eval_steps_per_second": 9.86, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 9.624013900756836, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3626, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.7999105055663995, |
|
"eval_loss": 0.37497109174728394, |
|
"eval_precision": 0.8058980811575966, |
|
"eval_recall": 0.794735406437534, |
|
"eval_runtime": 5.0526, |
|
"eval_samples_per_second": 78.97, |
|
"eval_steps_per_second": 9.896, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.469719886779785, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.359, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7802721088435374, |
|
"eval_loss": 0.37280407547950745, |
|
"eval_precision": 0.8065570314147164, |
|
"eval_recall": 0.7644117112202218, |
|
"eval_runtime": 5.0466, |
|
"eval_samples_per_second": 79.063, |
|
"eval_steps_per_second": 9.908, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.554767608642578, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3488, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7816021636937343, |
|
"eval_loss": 0.370919793844223, |
|
"eval_precision": 0.8049342105263158, |
|
"eval_recall": 0.7669121658483361, |
|
"eval_runtime": 5.0529, |
|
"eval_samples_per_second": 78.965, |
|
"eval_steps_per_second": 9.895, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.5811327695846558, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.3445, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8050261780104713, |
|
"eval_loss": 0.3667002022266388, |
|
"eval_precision": 0.8131443298969072, |
|
"eval_recall": 0.7982815057283142, |
|
"eval_runtime": 5.0467, |
|
"eval_samples_per_second": 79.061, |
|
"eval_steps_per_second": 9.907, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 5.522835731506348, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3344, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8039617859231819, |
|
"eval_loss": 0.3655822277069092, |
|
"eval_precision": 0.8142185588254234, |
|
"eval_recall": 0.7957810511002, |
|
"eval_runtime": 5.05, |
|
"eval_samples_per_second": 79.01, |
|
"eval_steps_per_second": 9.901, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 4.577361583709717, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.3339, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.7991821327461466, |
|
"eval_loss": 0.36540165543556213, |
|
"eval_precision": 0.8127623983206507, |
|
"eval_recall": 0.7890070921985816, |
|
"eval_runtime": 5.0524, |
|
"eval_samples_per_second": 78.973, |
|
"eval_steps_per_second": 9.896, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 2.5822412967681885, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3357, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8028733423778791, |
|
"eval_loss": 0.36383578181266785, |
|
"eval_precision": 0.8154009126466754, |
|
"eval_recall": 0.7932805964720859, |
|
"eval_runtime": 5.0893, |
|
"eval_samples_per_second": 78.399, |
|
"eval_steps_per_second": 9.824, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.345070838928223, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.3357, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8028733423778791, |
|
"eval_loss": 0.36459243297576904, |
|
"eval_precision": 0.8154009126466754, |
|
"eval_recall": 0.7932805964720859, |
|
"eval_runtime": 5.0505, |
|
"eval_samples_per_second": 79.002, |
|
"eval_steps_per_second": 9.9, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 6.21327018737793, |
|
"learning_rate": 0.0, |
|
"loss": 0.3359, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8054576910978295, |
|
"eval_loss": 0.3638042211532593, |
|
"eval_precision": 0.8193059564418788, |
|
"eval_recall": 0.795053646117476, |
|
"eval_runtime": 5.0736, |
|
"eval_samples_per_second": 78.643, |
|
"eval_steps_per_second": 9.855, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7590599775312000.0, |
|
"train_loss": 0.39794730983796667, |
|
"train_runtime": 1936.5815, |
|
"train_samples_per_second": 37.571, |
|
"train_steps_per_second": 1.26 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7590599775312000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|