{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.7894608974456787, "learning_rate": 4.75e-05, "loss": 0.5472, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7343358395989975, "eval_f1": 0.6338815789473684, "eval_loss": 0.4992983341217041, "eval_precision": 0.6726405580300865, "eval_recall": 0.6245226404800873, "eval_runtime": 5.1542, "eval_samples_per_second": 77.413, "eval_steps_per_second": 9.701, "step": 122 }, { "epoch": 2.0, "grad_norm": 5.12828254699707, "learning_rate": 4.5e-05, "loss": 0.4484, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7944862155388471, "eval_f1": 0.7744208494208494, "eval_loss": 0.4156816005706787, "eval_precision": 0.765545388374753, "eval_recall": 0.8096017457719585, "eval_runtime": 5.1518, "eval_samples_per_second": 77.449, "eval_steps_per_second": 9.705, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.4368088245391846, "learning_rate": 4.25e-05, "loss": 0.3338, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8179269882659713, "eval_loss": 0.32789668440818787, "eval_precision": 0.8510239760239761, "eval_recall": 0.7981905801054737, "eval_runtime": 5.107, "eval_samples_per_second": 78.127, "eval_steps_per_second": 9.79, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.854862689971924, "learning_rate": 4e-05, "loss": 0.2902, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8359744037230948, "eval_loss": 0.30365845561027527, "eval_precision": 0.8448835433371515, "eval_recall": 0.828514275322786, "eval_runtime": 5.1136, "eval_samples_per_second": 78.028, "eval_steps_per_second": 9.778, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.4754557609558105, "learning_rate": 3.7500000000000003e-05, "loss": 0.2756, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8480717680029244, "eval_loss": 0.292182594537735, "eval_precision": 0.8498775260257195, "eval_recall": 0.8463356973995272, "eval_runtime": 5.1797, "eval_samples_per_second": 77.031, "eval_steps_per_second": 9.653, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.3565938472747803, "learning_rate": 3.5e-05, "loss": 0.2514, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8446181767415888, "eval_loss": 0.3059082329273224, "eval_precision": 0.835902201887332, "eval_recall": 0.8560192762320422, "eval_runtime": 5.076, "eval_samples_per_second": 78.606, "eval_steps_per_second": 9.85, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.7644256949424744, "learning_rate": 3.2500000000000004e-05, "loss": 0.2338, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8347043853938569, "eval_loss": 0.2969984710216522, "eval_precision": 0.8277993283927745, "eval_recall": 0.8431987634115294, "eval_runtime": 5.1872, "eval_samples_per_second": 76.92, "eval_steps_per_second": 9.639, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.6158833503723145, "learning_rate": 3e-05, "loss": 0.2205, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8531398028421457, "eval_loss": 0.29671400785446167, "eval_precision": 0.8783539291322455, "eval_recall": 0.835924713584288, "eval_runtime": 5.0776, "eval_samples_per_second": 78.581, "eval_steps_per_second": 9.847, "step": 976 }, { "epoch": 9.0, "grad_norm": 7.319582462310791, "learning_rate": 2.7500000000000004e-05, "loss": 0.2153, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8401647707947546, "eval_loss": 0.29821664094924927, "eval_precision": 0.8393298751432535, "eval_recall": 0.8410165484633569, "eval_runtime": 5.1535, "eval_samples_per_second": 77.423, "eval_steps_per_second": 9.702, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.4924864768981934, "learning_rate": 2.5e-05, "loss": 0.1969, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8377065410088949, "eval_loss": 0.2942851483821869, "eval_precision": 0.8423344947735192, "eval_recall": 0.8335151845790143, "eval_runtime": 5.1062, "eval_samples_per_second": 78.14, "eval_steps_per_second": 9.792, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.3620827198028564, "learning_rate": 2.25e-05, "loss": 0.185, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8375505157126486, "eval_loss": 0.29727619886398315, "eval_precision": 0.8359243697478991, "eval_recall": 0.8392434988179669, "eval_runtime": 5.1573, "eval_samples_per_second": 77.366, "eval_steps_per_second": 9.695, "step": 1342 }, { "epoch": 12.0, "grad_norm": 5.206456661224365, "learning_rate": 2e-05, "loss": 0.1733, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8377065410088949, "eval_loss": 0.3074239492416382, "eval_precision": 0.8423344947735192, "eval_recall": 0.8335151845790143, "eval_runtime": 5.1717, "eval_samples_per_second": 77.151, "eval_steps_per_second": 9.668, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.582509994506836, "learning_rate": 1.75e-05, "loss": 0.1616, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8403508771929824, "eval_loss": 0.31861984729766846, "eval_precision": 0.8460491741741742, "eval_recall": 0.8352882342244045, "eval_runtime": 5.2202, "eval_samples_per_second": 76.433, "eval_steps_per_second": 9.578, "step": 1586 }, { "epoch": 14.0, "grad_norm": 4.573276519775391, "learning_rate": 1.5e-05, "loss": 0.16, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8347043853938569, "eval_loss": 0.32218077778816223, "eval_precision": 0.8277993283927745, "eval_recall": 0.8431987634115294, "eval_runtime": 5.1221, "eval_samples_per_second": 77.898, "eval_steps_per_second": 9.762, "step": 1708 }, { "epoch": 15.0, "grad_norm": 4.897996425628662, "learning_rate": 1.25e-05, "loss": 0.1494, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8464912280701754, "eval_loss": 0.3260069787502289, "eval_precision": 0.8522897897897899, "eval_recall": 0.8413347881432988, "eval_runtime": 5.1376, "eval_samples_per_second": 77.663, "eval_steps_per_second": 9.732, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.7224879264831543, "learning_rate": 1e-05, "loss": 0.1501, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8375505157126486, "eval_loss": 0.32325395941734314, "eval_precision": 0.8359243697478991, "eval_recall": 0.8392434988179669, "eval_runtime": 5.1275, "eval_samples_per_second": 77.816, "eval_steps_per_second": 9.751, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.4223344326019287, "learning_rate": 7.5e-06, "loss": 0.1468, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8385441718775052, "eval_loss": 0.32956239581108093, "eval_precision": 0.8412280701754387, "eval_recall": 0.8360156392071285, "eval_runtime": 5.1268, "eval_samples_per_second": 77.826, "eval_steps_per_second": 9.753, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.574972152709961, "learning_rate": 5e-06, "loss": 0.1423, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8342105263157895, "eval_loss": 0.3366738557815552, "eval_precision": 0.8398085585585586, "eval_recall": 0.82924168030551, "eval_runtime": 5.1034, "eval_samples_per_second": 78.183, "eval_steps_per_second": 9.797, "step": 2196 }, { "epoch": 19.0, "grad_norm": 7.210375785827637, "learning_rate": 2.5e-06, "loss": 0.1327, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8419946387230413, "eval_loss": 0.3395027816295624, "eval_precision": 0.8437691365584814, "eval_recall": 0.8402891434806329, "eval_runtime": 5.1186, "eval_samples_per_second": 77.952, "eval_steps_per_second": 9.768, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.7272565364837646, "learning_rate": 0.0, "loss": 0.1413, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8438308224802573, "eval_loss": 0.34336015582084656, "eval_precision": 0.8485409407665505, "eval_recall": 0.8395617384979087, "eval_runtime": 5.1504, "eval_samples_per_second": 77.47, "eval_steps_per_second": 9.708, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.22777412445818793, "train_runtime": 1952.8082, "train_samples_per_second": 37.259, "train_steps_per_second": 1.249 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }