{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.6188464164733887, "learning_rate": 4.75e-05, "loss": 0.5579, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7092731829573935, "eval_f1": 0.667786391042205, "eval_loss": 0.5389899015426636, "eval_precision": 0.6626257628236847, "eval_recall": 0.6793053282414985, "eval_runtime": 1.7107, "eval_samples_per_second": 233.243, "eval_steps_per_second": 29.228, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.044891595840454, "learning_rate": 4.5e-05, "loss": 0.5043, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7644110275689223, "eval_f1": 0.6425003812719231, "eval_loss": 0.48353031277656555, "eval_precision": 0.7516326530612245, "eval_recall": 0.6307965084560829, "eval_runtime": 1.7072, "eval_samples_per_second": 233.71, "eval_steps_per_second": 29.287, "step": 244 }, { "epoch": 3.0, "grad_norm": 6.0588178634643555, "learning_rate": 4.25e-05, "loss": 0.4819, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7769423558897243, "eval_f1": 0.7150114361381967, "eval_loss": 0.45847204327583313, "eval_precision": 0.7321575907590759, "eval_recall": 0.7046735770140026, "eval_runtime": 1.7067, "eval_samples_per_second": 233.787, "eval_steps_per_second": 29.297, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.7940452098846436, "learning_rate": 4e-05, "loss": 0.4474, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7819548872180451, "eval_f1": 0.7471685470185073, "eval_loss": 0.45870429277420044, "eval_precision": 0.7399324134046747, "eval_recall": 0.7582287688670667, "eval_runtime": 1.7057, "eval_samples_per_second": 233.927, "eval_steps_per_second": 29.314, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.552182197570801, "learning_rate": 3.7500000000000003e-05, "loss": 0.4336, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8070175438596491, "eval_f1": 0.7504244482173175, "eval_loss": 0.42432549595832825, "eval_precision": 0.7756337629230987, "eval_recall": 0.7359519912711402, "eval_runtime": 1.7092, "eval_samples_per_second": 233.446, "eval_steps_per_second": 29.254, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.7669365406036377, "learning_rate": 3.5e-05, "loss": 0.4036, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8220551378446115, "eval_f1": 0.7889628045918786, "eval_loss": 0.39900773763656616, "eval_precision": 0.7845601173020528, "eval_recall": 0.7940989270776504, "eval_runtime": 1.7065, "eval_samples_per_second": 233.816, "eval_steps_per_second": 29.3, "step": 732 }, { "epoch": 7.0, "grad_norm": 3.482671022415161, "learning_rate": 3.2500000000000004e-05, "loss": 0.3871, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.7917273014868713, "eval_loss": 0.38430532813072205, "eval_precision": 0.8074456774536514, "eval_recall": 0.780460083651573, "eval_runtime": 1.7068, "eval_samples_per_second": 233.771, "eval_steps_per_second": 29.295, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.953066825866699, "learning_rate": 3e-05, "loss": 0.3704, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.7838695967399185, "eval_loss": 0.37806496024131775, "eval_precision": 0.8269841269841269, "eval_recall": 0.7622294962720495, "eval_runtime": 1.7054, "eval_samples_per_second": 233.968, "eval_steps_per_second": 29.319, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.039283752441406, "learning_rate": 2.7500000000000004e-05, "loss": 0.3563, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.7958506634977223, "eval_loss": 0.3728295564651489, "eval_precision": 0.8343280282935455, "eval_recall": 0.7750500090925623, "eval_runtime": 1.7068, "eval_samples_per_second": 233.766, "eval_steps_per_second": 29.294, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.432938098907471, "learning_rate": 2.5e-05, "loss": 0.34, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8262195121951219, "eval_loss": 0.354525089263916, "eval_precision": 0.8360165151709128, "eval_recall": 0.8181942171303873, "eval_runtime": 1.7046, "eval_samples_per_second": 234.074, "eval_steps_per_second": 29.333, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.296050548553467, "learning_rate": 2.25e-05, "loss": 0.3394, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8245369048813042, "eval_loss": 0.34459224343299866, "eval_precision": 0.8310003145643283, "eval_recall": 0.8189216221131115, "eval_runtime": 1.7032, "eval_samples_per_second": 234.258, "eval_steps_per_second": 29.356, "step": 1342 }, { "epoch": 12.0, "grad_norm": 3.527177572250366, "learning_rate": 2e-05, "loss": 0.3182, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8242843661528783, "eval_loss": 0.3410705029964447, "eval_precision": 0.8389366308055628, "eval_recall": 0.8131933078741589, "eval_runtime": 1.7024, "eval_samples_per_second": 234.371, "eval_steps_per_second": 29.37, "step": 1464 }, { "epoch": 13.0, "grad_norm": 3.337897777557373, "learning_rate": 1.75e-05, "loss": 0.3226, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8237632508833923, "eval_loss": 0.33531269431114197, "eval_precision": 0.8254439681567667, "eval_recall": 0.8221494817239499, "eval_runtime": 1.7031, "eval_samples_per_second": 234.283, "eval_steps_per_second": 29.359, "step": 1586 }, { "epoch": 14.0, "grad_norm": 5.108224868774414, "learning_rate": 1.5e-05, "loss": 0.3181, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8271953405017921, "eval_loss": 0.33688613772392273, "eval_precision": 0.8228172499116295, "eval_recall": 0.8321513002364066, "eval_runtime": 1.7034, "eval_samples_per_second": 234.241, "eval_steps_per_second": 29.354, "step": 1708 }, { "epoch": 15.0, "grad_norm": 4.430032253265381, "learning_rate": 1.25e-05, "loss": 0.3044, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8263588263588264, "eval_loss": 0.3312358856201172, "eval_precision": 0.8289473684210527, "eval_recall": 0.8239225313693399, "eval_runtime": 1.7023, "eval_samples_per_second": 234.391, "eval_steps_per_second": 29.372, "step": 1830 }, { "epoch": 16.0, "grad_norm": 4.105480670928955, "learning_rate": 1e-05, "loss": 0.3038, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8281017346283209, "eval_loss": 0.3286840319633484, "eval_precision": 0.8272965800108572, "eval_recall": 0.8289234406255683, "eval_runtime": 1.7022, "eval_samples_per_second": 234.402, "eval_steps_per_second": 29.374, "step": 1952 }, { "epoch": 17.0, "grad_norm": 6.878963470458984, "learning_rate": 7.5e-06, "loss": 0.3033, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8323529411764705, "eval_loss": 0.32676100730895996, "eval_precision": 0.8292704679231822, "eval_recall": 0.8356973995271868, "eval_runtime": 1.7037, "eval_samples_per_second": 234.194, "eval_steps_per_second": 29.348, "step": 2074 }, { "epoch": 18.0, "grad_norm": 2.2083144187927246, "learning_rate": 5e-06, "loss": 0.3018, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8289446964056049, "eval_loss": 0.3250683546066284, "eval_precision": 0.8266129032258065, "eval_recall": 0.8314238952536825, "eval_runtime": 1.7036, "eval_samples_per_second": 234.208, "eval_steps_per_second": 29.349, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.615198612213135, "learning_rate": 2.5e-06, "loss": 0.2955, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8281017346283209, "eval_loss": 0.3252684772014618, "eval_precision": 0.8272965800108572, "eval_recall": 0.8289234406255683, "eval_runtime": 1.7036, "eval_samples_per_second": 234.205, "eval_steps_per_second": 29.349, "step": 2318 }, { "epoch": 20.0, "grad_norm": 6.056838035583496, "learning_rate": 0.0, "loss": 0.2999, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8167483159828537, "eval_loss": 0.32519420981407166, "eval_precision": 0.8201621387462095, "eval_recall": 0.8136024731769412, "eval_runtime": 1.7026, "eval_samples_per_second": 234.342, "eval_steps_per_second": 29.366, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7618923371160000.0, "train_loss": 0.3694801080422323, "train_runtime": 613.6291, "train_samples_per_second": 118.801, "train_steps_per_second": 3.976 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7618923371160000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }