{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.7894608974456787, "learning_rate": 4.75e-05, "loss": 0.5472, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7343358395989975, "eval_f1": 0.6338815789473684, "eval_loss": 0.4992983341217041, "eval_precision": 0.6726405580300865, "eval_recall": 0.6245226404800873, "eval_runtime": 5.1909, "eval_samples_per_second": 76.865, "eval_steps_per_second": 9.632, "step": 122 }, { "epoch": 2.0, "grad_norm": 5.12828254699707, "learning_rate": 4.5e-05, "loss": 0.4484, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7944862155388471, "eval_f1": 0.7744208494208494, "eval_loss": 0.4156816005706787, "eval_precision": 0.765545388374753, "eval_recall": 0.8096017457719585, "eval_runtime": 5.1989, "eval_samples_per_second": 76.746, "eval_steps_per_second": 9.617, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.4368088245391846, "learning_rate": 4.25e-05, "loss": 0.3338, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8179269882659713, "eval_loss": 0.32789668440818787, "eval_precision": 0.8510239760239761, "eval_recall": 0.7981905801054737, "eval_runtime": 5.1341, "eval_samples_per_second": 77.715, "eval_steps_per_second": 9.739, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.854862689971924, "learning_rate": 4e-05, "loss": 0.2902, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8359744037230948, "eval_loss": 0.30365845561027527, "eval_precision": 0.8448835433371515, "eval_recall": 0.828514275322786, "eval_runtime": 5.0662, "eval_samples_per_second": 78.758, "eval_steps_per_second": 9.869, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.4754557609558105, "learning_rate": 3.7500000000000003e-05, "loss": 0.2756, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8480717680029244, "eval_loss": 0.292182594537735, "eval_precision": 0.8498775260257195, "eval_recall": 0.8463356973995272, "eval_runtime": 5.091, "eval_samples_per_second": 78.374, "eval_steps_per_second": 9.821, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.3565938472747803, "learning_rate": 3.5e-05, "loss": 0.2514, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8446181767415888, "eval_loss": 0.3059082329273224, "eval_precision": 0.835902201887332, "eval_recall": 0.8560192762320422, "eval_runtime": 5.1647, "eval_samples_per_second": 77.255, "eval_steps_per_second": 9.681, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.7644256949424744, "learning_rate": 3.2500000000000004e-05, "loss": 0.2338, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8347043853938569, "eval_loss": 0.2969984710216522, "eval_precision": 0.8277993283927745, "eval_recall": 0.8431987634115294, "eval_runtime": 5.1318, "eval_samples_per_second": 77.751, "eval_steps_per_second": 9.743, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.6158833503723145, "learning_rate": 3e-05, "loss": 0.2205, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8531398028421457, "eval_loss": 0.29671400785446167, "eval_precision": 0.8783539291322455, "eval_recall": 0.835924713584288, "eval_runtime": 5.1445, "eval_samples_per_second": 77.558, "eval_steps_per_second": 9.719, "step": 976 }, { "epoch": 9.0, "grad_norm": 7.319582462310791, "learning_rate": 2.7500000000000004e-05, "loss": 0.2153, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8401647707947546, "eval_loss": 0.29821664094924927, "eval_precision": 0.8393298751432535, "eval_recall": 0.8410165484633569, "eval_runtime": 5.1158, "eval_samples_per_second": 77.993, "eval_steps_per_second": 9.774, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.4924864768981934, "learning_rate": 2.5e-05, "loss": 0.1969, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8377065410088949, "eval_loss": 0.2942851483821869, "eval_precision": 0.8423344947735192, "eval_recall": 0.8335151845790143, "eval_runtime": 5.1603, "eval_samples_per_second": 77.322, "eval_steps_per_second": 9.689, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.3620827198028564, "learning_rate": 2.25e-05, "loss": 0.185, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8375505157126486, "eval_loss": 0.29727619886398315, "eval_precision": 0.8359243697478991, "eval_recall": 0.8392434988179669, "eval_runtime": 9.0436, "eval_samples_per_second": 44.12, "eval_steps_per_second": 5.529, "step": 1342 }, { "epoch": 12.0, "grad_norm": 5.206456661224365, "learning_rate": 2e-05, "loss": 0.1733, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8377065410088949, "eval_loss": 0.3074239492416382, "eval_precision": 0.8423344947735192, "eval_recall": 0.8335151845790143, "eval_runtime": 5.1279, "eval_samples_per_second": 77.81, "eval_steps_per_second": 9.751, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.582509994506836, "learning_rate": 1.75e-05, "loss": 0.1616, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8403508771929824, "eval_loss": 0.31861984729766846, "eval_precision": 0.8460491741741742, "eval_recall": 0.8352882342244045, "eval_runtime": 5.1383, "eval_samples_per_second": 77.652, "eval_steps_per_second": 9.731, "step": 1586 }, { "epoch": 14.0, "grad_norm": 4.573276519775391, "learning_rate": 1.5e-05, "loss": 0.16, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8347043853938569, "eval_loss": 0.32218077778816223, "eval_precision": 0.8277993283927745, "eval_recall": 0.8431987634115294, "eval_runtime": 5.0708, "eval_samples_per_second": 78.686, "eval_steps_per_second": 9.86, "step": 1708 }, { "epoch": 15.0, "grad_norm": 4.897996425628662, "learning_rate": 1.25e-05, "loss": 0.1494, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8464912280701754, "eval_loss": 0.3260069787502289, "eval_precision": 0.8522897897897899, "eval_recall": 0.8413347881432988, "eval_runtime": 5.1509, "eval_samples_per_second": 77.462, "eval_steps_per_second": 9.707, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.7224879264831543, "learning_rate": 1e-05, "loss": 0.1501, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8375505157126486, "eval_loss": 0.32325395941734314, "eval_precision": 0.8359243697478991, "eval_recall": 0.8392434988179669, "eval_runtime": 5.1583, "eval_samples_per_second": 77.352, "eval_steps_per_second": 9.693, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.4223344326019287, "learning_rate": 7.5e-06, "loss": 0.1468, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8385441718775052, "eval_loss": 0.32956239581108093, "eval_precision": 0.8412280701754387, "eval_recall": 0.8360156392071285, "eval_runtime": 5.0878, "eval_samples_per_second": 78.423, "eval_steps_per_second": 9.827, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.574972152709961, "learning_rate": 5e-06, "loss": 0.1423, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8342105263157895, "eval_loss": 0.3366738557815552, "eval_precision": 0.8398085585585586, "eval_recall": 0.82924168030551, "eval_runtime": 5.1002, "eval_samples_per_second": 78.232, "eval_steps_per_second": 9.804, "step": 2196 }, { "epoch": 19.0, "grad_norm": 7.210375785827637, "learning_rate": 2.5e-06, "loss": 0.1327, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8419946387230413, "eval_loss": 0.3395027816295624, "eval_precision": 0.8437691365584814, "eval_recall": 0.8402891434806329, "eval_runtime": 5.143, "eval_samples_per_second": 77.581, "eval_steps_per_second": 9.722, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.7272565364837646, "learning_rate": 0.0, "loss": 0.1413, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8438308224802573, "eval_loss": 0.34336015582084656, "eval_precision": 0.8485409407665505, "eval_recall": 0.8395617384979087, "eval_runtime": 5.0731, "eval_samples_per_second": 78.65, "eval_steps_per_second": 9.856, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.22777412445818793, "train_runtime": 1962.6343, "train_samples_per_second": 37.073, "train_steps_per_second": 1.243 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }