|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.5641188621521, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5535, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6401043033324587, |
|
"eval_loss": 0.5041041374206543, |
|
"eval_precision": 0.6583725987676694, |
|
"eval_recall": 0.6324331696672122, |
|
"eval_runtime": 1.8214, |
|
"eval_samples_per_second": 219.06, |
|
"eval_steps_per_second": 27.451, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.286947011947632, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4636, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7669172932330827, |
|
"eval_f1": 0.7331142070096449, |
|
"eval_loss": 0.4692240059375763, |
|
"eval_precision": 0.7252895752895754, |
|
"eval_recall": 0.7475904709947263, |
|
"eval_runtime": 1.8328, |
|
"eval_samples_per_second": 217.703, |
|
"eval_steps_per_second": 27.281, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.800495147705078, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4023, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.7809488416091623, |
|
"eval_loss": 0.36048197746276855, |
|
"eval_precision": 0.832562695924765, |
|
"eval_recall": 0.7572285870158211, |
|
"eval_runtime": 1.8261, |
|
"eval_samples_per_second": 218.495, |
|
"eval_steps_per_second": 27.38, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.0396647453308105, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3202, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8159125620465827, |
|
"eval_loss": 0.3256481885910034, |
|
"eval_precision": 0.8356565656565657, |
|
"eval_recall": 0.8021458446990362, |
|
"eval_runtime": 1.8317, |
|
"eval_samples_per_second": 217.825, |
|
"eval_steps_per_second": 27.296, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.8746237754821777, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2919, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8475258334958082, |
|
"eval_loss": 0.3067488968372345, |
|
"eval_precision": 0.8591828192414193, |
|
"eval_recall": 0.8381069285324605, |
|
"eval_runtime": 1.8315, |
|
"eval_samples_per_second": 217.857, |
|
"eval_steps_per_second": 27.3, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.942033290863037, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2657, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8320383569853806, |
|
"eval_loss": 0.3400041460990906, |
|
"eval_precision": 0.8193218954248366, |
|
"eval_recall": 0.8553827968721586, |
|
"eval_runtime": 1.8306, |
|
"eval_samples_per_second": 217.958, |
|
"eval_steps_per_second": 27.313, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.39800548553466797, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2559, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8524146298159436, |
|
"eval_loss": 0.2993007302284241, |
|
"eval_precision": 0.8451250578971746, |
|
"eval_recall": 0.8613384251682124, |
|
"eval_runtime": 1.8316, |
|
"eval_samples_per_second": 217.847, |
|
"eval_steps_per_second": 27.299, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 7.434815406799316, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2369, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8584001703456596, |
|
"eval_loss": 0.30177775025367737, |
|
"eval_precision": 0.8759655377302435, |
|
"eval_recall": 0.8451991271140207, |
|
"eval_runtime": 1.8332, |
|
"eval_samples_per_second": 217.658, |
|
"eval_steps_per_second": 27.275, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.123136043548584, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2178, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8595070422535211, |
|
"eval_loss": 0.29259544610977173, |
|
"eval_precision": 0.8633733523114054, |
|
"eval_recall": 0.8559283506092017, |
|
"eval_runtime": 1.8408, |
|
"eval_samples_per_second": 216.759, |
|
"eval_steps_per_second": 27.163, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.8655648231506348, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2118, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8622036668943447, |
|
"eval_loss": 0.29553094506263733, |
|
"eval_precision": 0.8671602787456446, |
|
"eval_recall": 0.8577014002545917, |
|
"eval_runtime": 1.8333, |
|
"eval_samples_per_second": 217.636, |
|
"eval_steps_per_second": 27.273, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.4139134883880615, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2034, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8572517421602788, |
|
"eval_loss": 0.2934134602546692, |
|
"eval_precision": 0.8679426449878376, |
|
"eval_recall": 0.8484269867248591, |
|
"eval_runtime": 1.8401, |
|
"eval_samples_per_second": 216.837, |
|
"eval_steps_per_second": 27.173, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.207653045654297, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1856, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8533986527862829, |
|
"eval_loss": 0.297758549451828, |
|
"eval_precision": 0.8572003218020917, |
|
"eval_recall": 0.8498817966903074, |
|
"eval_runtime": 1.8319, |
|
"eval_samples_per_second": 217.802, |
|
"eval_steps_per_second": 27.293, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.1223020553588867, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1775, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8493865995596099, |
|
"eval_loss": 0.3038978576660156, |
|
"eval_precision": 0.8651108632904749, |
|
"eval_recall": 0.8373795235497363, |
|
"eval_runtime": 1.8374, |
|
"eval_samples_per_second": 217.149, |
|
"eval_steps_per_second": 27.212, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 7.688318252563477, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1719, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8622036668943447, |
|
"eval_loss": 0.3036334812641144, |
|
"eval_precision": 0.8671602787456446, |
|
"eval_recall": 0.8577014002545917, |
|
"eval_runtime": 1.8354, |
|
"eval_samples_per_second": 217.392, |
|
"eval_steps_per_second": 27.242, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.8339260816574097, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1621, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8596342841745197, |
|
"eval_loss": 0.299029141664505, |
|
"eval_precision": 0.8555364857667042, |
|
"eval_recall": 0.8641571194762684, |
|
"eval_runtime": 1.8346, |
|
"eval_samples_per_second": 217.487, |
|
"eval_steps_per_second": 27.254, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.4192665815353394, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1535, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8616171059774413, |
|
"eval_loss": 0.3039585053920746, |
|
"eval_precision": 0.859873949579832, |
|
"eval_recall": 0.8634297144935443, |
|
"eval_runtime": 1.8339, |
|
"eval_samples_per_second": 217.57, |
|
"eval_steps_per_second": 27.264, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.2236837148666382, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1504, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8510452961672474, |
|
"eval_loss": 0.31895172595977783, |
|
"eval_precision": 0.8615574190244527, |
|
"eval_recall": 0.8423804328059648, |
|
"eval_runtime": 1.8322, |
|
"eval_samples_per_second": 217.776, |
|
"eval_steps_per_second": 27.29, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.8861066102981567, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1459, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8522278069611882, |
|
"eval_loss": 0.31010520458221436, |
|
"eval_precision": 0.8513631702756499, |
|
"eval_recall": 0.8531096563011457, |
|
"eval_runtime": 1.8303, |
|
"eval_samples_per_second": 217.993, |
|
"eval_steps_per_second": 27.317, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.291304349899292, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1444, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.855319904024935, |
|
"eval_loss": 0.3119203448295593, |
|
"eval_precision": 0.862378106322743, |
|
"eval_recall": 0.8491543917075832, |
|
"eval_runtime": 1.8361, |
|
"eval_samples_per_second": 217.305, |
|
"eval_steps_per_second": 27.231, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.763808250427246, |
|
"learning_rate": 0.0, |
|
"loss": 0.1384, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.3090469241142273, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 1.8381, |
|
"eval_samples_per_second": 217.076, |
|
"eval_steps_per_second": 27.203, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.242632052937492, |
|
"train_runtime": 627.3825, |
|
"train_samples_per_second": 115.974, |
|
"train_steps_per_second": 3.889 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|