|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.3284077644348145, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5535, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7268170426065163, |
|
"eval_f1": 0.6326680574676724, |
|
"eval_loss": 0.5077849626541138, |
|
"eval_precision": 0.6605799373040753, |
|
"eval_recall": 0.6242044008001455, |
|
"eval_runtime": 5.0717, |
|
"eval_samples_per_second": 78.673, |
|
"eval_steps_per_second": 9.859, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.732577085494995, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4682, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8170426065162907, |
|
"eval_f1": 0.777617444284111, |
|
"eval_loss": 0.4184626042842865, |
|
"eval_precision": 0.7798245614035089, |
|
"eval_recall": 0.7755501000181851, |
|
"eval_runtime": 5.0889, |
|
"eval_samples_per_second": 78.406, |
|
"eval_steps_per_second": 9.825, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.4716732501983643, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3849, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8170426065162907, |
|
"eval_f1": 0.757268931723293, |
|
"eval_loss": 0.38087406754493713, |
|
"eval_precision": 0.7968253968253969, |
|
"eval_recall": 0.7380432805964721, |
|
"eval_runtime": 5.1872, |
|
"eval_samples_per_second": 76.92, |
|
"eval_steps_per_second": 9.639, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.9364979267120361, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3127, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8289446964056049, |
|
"eval_loss": 0.32795679569244385, |
|
"eval_precision": 0.8266129032258065, |
|
"eval_recall": 0.8314238952536825, |
|
"eval_runtime": 5.0897, |
|
"eval_samples_per_second": 78.394, |
|
"eval_steps_per_second": 9.824, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.656118392944336, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2869, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8341332527115377, |
|
"eval_loss": 0.3168599307537079, |
|
"eval_precision": 0.8333132275770553, |
|
"eval_recall": 0.8349699945444626, |
|
"eval_runtime": 5.064, |
|
"eval_samples_per_second": 78.791, |
|
"eval_steps_per_second": 9.874, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.3921332359313965, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.274, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8576006759069754, |
|
"eval_loss": 0.3217551112174988, |
|
"eval_precision": 0.8466769923965081, |
|
"eval_recall": 0.8731132933260592, |
|
"eval_runtime": 5.0567, |
|
"eval_samples_per_second": 78.906, |
|
"eval_steps_per_second": 9.888, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.9414736032485962, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2539, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8417152566223307, |
|
"eval_loss": 0.30381932854652405, |
|
"eval_precision": 0.8378262413446174, |
|
"eval_recall": 0.8460174577195854, |
|
"eval_runtime": 5.0791, |
|
"eval_samples_per_second": 78.557, |
|
"eval_steps_per_second": 9.844, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.456129789352417, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2286, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8341632880321839, |
|
"eval_loss": 0.32023322582244873, |
|
"eval_precision": 0.8479139504563233, |
|
"eval_recall": 0.8235133660665576, |
|
"eval_runtime": 5.2736, |
|
"eval_samples_per_second": 75.66, |
|
"eval_steps_per_second": 9.481, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.962612152099609, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2249, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8662440310793597, |
|
"eval_loss": 0.2973268926143646, |
|
"eval_precision": 0.8606158357771261, |
|
"eval_recall": 0.872704128023277, |
|
"eval_runtime": 5.05, |
|
"eval_samples_per_second": 79.009, |
|
"eval_steps_per_second": 9.901, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.1556286811828613, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2083, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8376560692488731, |
|
"eval_loss": 0.3127811551094055, |
|
"eval_precision": 0.8602278120550546, |
|
"eval_recall": 0.8220585561011093, |
|
"eval_runtime": 5.0596, |
|
"eval_samples_per_second": 78.86, |
|
"eval_steps_per_second": 9.882, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.8982645273208618, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1935, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8721887408091659, |
|
"eval_loss": 0.2957092523574829, |
|
"eval_precision": 0.866466275659824, |
|
"eval_recall": 0.8787506819421713, |
|
"eval_runtime": 5.0454, |
|
"eval_samples_per_second": 79.082, |
|
"eval_steps_per_second": 9.91, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 6.364419460296631, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1859, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8602993213495533, |
|
"eval_loss": 0.2869341969490051, |
|
"eval_precision": 0.8547653958944281, |
|
"eval_recall": 0.8666575741043827, |
|
"eval_runtime": 5.0666, |
|
"eval_samples_per_second": 78.751, |
|
"eval_steps_per_second": 9.869, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.9051835536956787, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1735, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8502252252252251, |
|
"eval_loss": 0.30611610412597656, |
|
"eval_precision": 0.863265306122449, |
|
"eval_recall": 0.8398799781778505, |
|
"eval_runtime": 5.1317, |
|
"eval_samples_per_second": 77.752, |
|
"eval_steps_per_second": 9.743, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.753292083740234, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1804, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8695225637671682, |
|
"eval_loss": 0.29550090432167053, |
|
"eval_precision": 0.8631532846715328, |
|
"eval_recall": 0.8769776322967813, |
|
"eval_runtime": 5.0486, |
|
"eval_samples_per_second": 79.032, |
|
"eval_steps_per_second": 9.904, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.1921367347240448, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1628, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8757339815412664, |
|
"eval_loss": 0.2972831130027771, |
|
"eval_precision": 0.8766906299500427, |
|
"eval_recall": 0.8747954173486088, |
|
"eval_runtime": 5.0606, |
|
"eval_samples_per_second": 78.844, |
|
"eval_steps_per_second": 9.88, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.38073158264160156, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1619, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8707140332272888, |
|
"eval_loss": 0.3023494482040405, |
|
"eval_precision": 0.8618432385874246, |
|
"eval_recall": 0.8819785415530097, |
|
"eval_runtime": 5.0599, |
|
"eval_samples_per_second": 78.856, |
|
"eval_steps_per_second": 9.882, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.6226332187652588, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1514, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8775533117267087, |
|
"eval_loss": 0.2997310757637024, |
|
"eval_precision": 0.873246730188791, |
|
"eval_recall": 0.8822967812329514, |
|
"eval_runtime": 5.0625, |
|
"eval_samples_per_second": 78.815, |
|
"eval_steps_per_second": 9.877, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.088443756103516, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1503, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8736504011098378, |
|
"eval_loss": 0.3002457618713379, |
|
"eval_precision": 0.8718487394957983, |
|
"eval_recall": 0.8755228223313329, |
|
"eval_runtime": 5.0624, |
|
"eval_samples_per_second": 78.817, |
|
"eval_steps_per_second": 9.877, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 6.321498394012451, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.154, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8730223677032187, |
|
"eval_loss": 0.303114652633667, |
|
"eval_precision": 0.8730223677032187, |
|
"eval_recall": 0.8730223677032187, |
|
"eval_runtime": 5.2549, |
|
"eval_samples_per_second": 75.929, |
|
"eval_steps_per_second": 9.515, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.89798903465271, |
|
"learning_rate": 0.0, |
|
"loss": 0.1408, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8763538792940554, |
|
"eval_loss": 0.30106595158576965, |
|
"eval_precision": 0.8754297605404427, |
|
"eval_recall": 0.877295871976723, |
|
"eval_runtime": 5.0597, |
|
"eval_samples_per_second": 78.858, |
|
"eval_steps_per_second": 9.882, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.24252970335913487, |
|
"train_runtime": 1919.2772, |
|
"train_samples_per_second": 37.91, |
|
"train_steps_per_second": 1.271 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|