{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.3284077644348145, "learning_rate": 4.75e-05, "loss": 0.5535, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7268170426065163, "eval_f1": 0.6326680574676724, "eval_loss": 0.5077849626541138, "eval_precision": 0.6605799373040753, "eval_recall": 0.6242044008001455, "eval_runtime": 5.0717, "eval_samples_per_second": 78.673, "eval_steps_per_second": 9.859, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.732577085494995, "learning_rate": 4.5e-05, "loss": 0.4682, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8170426065162907, "eval_f1": 0.777617444284111, "eval_loss": 0.4184626042842865, "eval_precision": 0.7798245614035089, "eval_recall": 0.7755501000181851, "eval_runtime": 5.0889, "eval_samples_per_second": 78.406, "eval_steps_per_second": 9.825, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.4716732501983643, "learning_rate": 4.25e-05, "loss": 0.3849, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8170426065162907, "eval_f1": 0.757268931723293, "eval_loss": 0.38087406754493713, "eval_precision": 0.7968253968253969, "eval_recall": 0.7380432805964721, "eval_runtime": 5.1872, "eval_samples_per_second": 76.92, "eval_steps_per_second": 9.639, "step": 366 }, { "epoch": 4.0, "grad_norm": 1.9364979267120361, "learning_rate": 4e-05, "loss": 0.3127, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8289446964056049, "eval_loss": 0.32795679569244385, "eval_precision": 0.8266129032258065, "eval_recall": 0.8314238952536825, "eval_runtime": 5.0897, "eval_samples_per_second": 78.394, "eval_steps_per_second": 9.824, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.656118392944336, "learning_rate": 3.7500000000000003e-05, "loss": 0.2869, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8341332527115377, "eval_loss": 0.3168599307537079, "eval_precision": 0.8333132275770553, "eval_recall": 0.8349699945444626, "eval_runtime": 5.064, "eval_samples_per_second": 78.791, "eval_steps_per_second": 9.874, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.3921332359313965, "learning_rate": 3.5e-05, "loss": 0.274, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8576006759069754, "eval_loss": 0.3217551112174988, "eval_precision": 0.8466769923965081, "eval_recall": 0.8731132933260592, "eval_runtime": 5.0567, "eval_samples_per_second": 78.906, "eval_steps_per_second": 9.888, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.9414736032485962, "learning_rate": 3.2500000000000004e-05, "loss": 0.2539, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8417152566223307, "eval_loss": 0.30381932854652405, "eval_precision": 0.8378262413446174, "eval_recall": 0.8460174577195854, "eval_runtime": 5.0791, "eval_samples_per_second": 78.557, "eval_steps_per_second": 9.844, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.456129789352417, "learning_rate": 3e-05, "loss": 0.2286, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8341632880321839, "eval_loss": 0.32023322582244873, "eval_precision": 0.8479139504563233, "eval_recall": 0.8235133660665576, "eval_runtime": 5.2736, "eval_samples_per_second": 75.66, "eval_steps_per_second": 9.481, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.962612152099609, "learning_rate": 2.7500000000000004e-05, "loss": 0.2249, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8662440310793597, "eval_loss": 0.2973268926143646, "eval_precision": 0.8606158357771261, "eval_recall": 0.872704128023277, "eval_runtime": 5.05, "eval_samples_per_second": 79.009, "eval_steps_per_second": 9.901, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.1556286811828613, "learning_rate": 2.5e-05, "loss": 0.2083, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8376560692488731, "eval_loss": 0.3127811551094055, "eval_precision": 0.8602278120550546, "eval_recall": 0.8220585561011093, "eval_runtime": 5.0596, "eval_samples_per_second": 78.86, "eval_steps_per_second": 9.882, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.8982645273208618, "learning_rate": 2.25e-05, "loss": 0.1935, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8721887408091659, "eval_loss": 0.2957092523574829, "eval_precision": 0.866466275659824, "eval_recall": 0.8787506819421713, "eval_runtime": 5.0454, "eval_samples_per_second": 79.082, "eval_steps_per_second": 9.91, "step": 1342 }, { "epoch": 12.0, "grad_norm": 6.364419460296631, "learning_rate": 2e-05, "loss": 0.1859, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8602993213495533, "eval_loss": 0.2869341969490051, "eval_precision": 0.8547653958944281, "eval_recall": 0.8666575741043827, "eval_runtime": 5.0666, "eval_samples_per_second": 78.751, "eval_steps_per_second": 9.869, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.9051835536956787, "learning_rate": 1.75e-05, "loss": 0.1735, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8502252252252251, "eval_loss": 0.30611610412597656, "eval_precision": 0.863265306122449, "eval_recall": 0.8398799781778505, "eval_runtime": 5.1317, "eval_samples_per_second": 77.752, "eval_steps_per_second": 9.743, "step": 1586 }, { "epoch": 14.0, "grad_norm": 6.753292083740234, "learning_rate": 1.5e-05, "loss": 0.1804, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8695225637671682, "eval_loss": 0.29550090432167053, "eval_precision": 0.8631532846715328, "eval_recall": 0.8769776322967813, "eval_runtime": 5.0486, "eval_samples_per_second": 79.032, "eval_steps_per_second": 9.904, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.1921367347240448, "learning_rate": 1.25e-05, "loss": 0.1628, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8757339815412664, "eval_loss": 0.2972831130027771, "eval_precision": 0.8766906299500427, "eval_recall": 0.8747954173486088, "eval_runtime": 5.0606, "eval_samples_per_second": 78.844, "eval_steps_per_second": 9.88, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.38073158264160156, "learning_rate": 1e-05, "loss": 0.1619, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8707140332272888, "eval_loss": 0.3023494482040405, "eval_precision": 0.8618432385874246, "eval_recall": 0.8819785415530097, "eval_runtime": 5.0599, "eval_samples_per_second": 78.856, "eval_steps_per_second": 9.882, "step": 1952 }, { "epoch": 17.0, "grad_norm": 1.6226332187652588, "learning_rate": 7.5e-06, "loss": 0.1514, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8775533117267087, "eval_loss": 0.2997310757637024, "eval_precision": 0.873246730188791, "eval_recall": 0.8822967812329514, "eval_runtime": 5.0625, "eval_samples_per_second": 78.815, "eval_steps_per_second": 9.877, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.088443756103516, "learning_rate": 5e-06, "loss": 0.1503, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8736504011098378, "eval_loss": 0.3002457618713379, "eval_precision": 0.8718487394957983, "eval_recall": 0.8755228223313329, "eval_runtime": 5.0624, "eval_samples_per_second": 78.817, "eval_steps_per_second": 9.877, "step": 2196 }, { "epoch": 19.0, "grad_norm": 6.321498394012451, "learning_rate": 2.5e-06, "loss": 0.154, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8730223677032187, "eval_loss": 0.303114652633667, "eval_precision": 0.8730223677032187, "eval_recall": 0.8730223677032187, "eval_runtime": 5.2549, "eval_samples_per_second": 75.929, "eval_steps_per_second": 9.515, "step": 2318 }, { "epoch": 20.0, "grad_norm": 1.89798903465271, "learning_rate": 0.0, "loss": 0.1408, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8763538792940554, "eval_loss": 0.30106595158576965, "eval_precision": 0.8754297605404427, "eval_recall": 0.877295871976723, "eval_runtime": 5.0597, "eval_samples_per_second": 78.858, "eval_steps_per_second": 9.882, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.24252970335913487, "train_runtime": 1919.2772, "train_samples_per_second": 37.91, "train_steps_per_second": 1.271 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }