{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.8857197761535645, "learning_rate": 4.75e-05, "loss": 0.563, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6586454703832753, "eval_loss": 0.5137906670570374, "eval_precision": 0.6636154141595185, "eval_recall": 0.65493726132024, "eval_runtime": 5.0938, "eval_samples_per_second": 78.33, "eval_steps_per_second": 9.816, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.8023645877838135, "learning_rate": 4.5e-05, "loss": 0.509, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7167919799498746, "eval_f1": 0.68198649992594, "eval_loss": 0.5057324767112732, "eval_precision": 0.6763453815261045, "eval_recall": 0.6996272049463539, "eval_runtime": 5.0746, "eval_samples_per_second": 78.627, "eval_steps_per_second": 9.853, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.055728912353516, "learning_rate": 4.25e-05, "loss": 0.4924, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7393483709273183, "eval_f1": 0.6901433691756272, "eval_loss": 0.4707716703414917, "eval_precision": 0.6876546482856133, "eval_recall": 0.6930805601018367, "eval_runtime": 5.082, "eval_samples_per_second": 78.512, "eval_steps_per_second": 9.839, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.744323253631592, "learning_rate": 4e-05, "loss": 0.468, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7844611528822055, "eval_f1": 0.7286143625434989, "eval_loss": 0.4378769099712372, "eval_precision": 0.741162203468669, "eval_recall": 0.7199945444626296, "eval_runtime": 5.045, "eval_samples_per_second": 79.089, "eval_steps_per_second": 9.911, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.431583285331726, "learning_rate": 3.7500000000000003e-05, "loss": 0.4495, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7593984962406015, "eval_f1": 0.7313131313131314, "eval_loss": 0.44655734300613403, "eval_precision": 0.7233381157340986, "eval_recall": 0.7547735951991271, "eval_runtime": 5.0782, "eval_samples_per_second": 78.571, "eval_steps_per_second": 9.846, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.092684745788574, "learning_rate": 3.5e-05, "loss": 0.4334, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7887122892379951, "eval_loss": 0.40413278341293335, "eval_precision": 0.7926829268292683, "eval_recall": 0.7851427532278596, "eval_runtime": 5.0593, "eval_samples_per_second": 78.865, "eval_steps_per_second": 9.883, "step": 732 }, { "epoch": 7.0, "grad_norm": 2.259237289428711, "learning_rate": 3.2500000000000004e-05, "loss": 0.415, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.7994987468671679, "eval_f1": 0.765982404692082, "eval_loss": 0.4057486653327942, "eval_precision": 0.7590435228437963, "eval_recall": 0.7756410256410257, "eval_runtime": 5.1052, "eval_samples_per_second": 78.156, "eval_steps_per_second": 9.794, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.503615379333496, "learning_rate": 3e-05, "loss": 0.3974, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.7958954625621293, "eval_loss": 0.3851749002933502, "eval_precision": 0.7982456140350878, "eval_recall": 0.7936897617748682, "eval_runtime": 5.0641, "eval_samples_per_second": 78.79, "eval_steps_per_second": 9.873, "step": 976 }, { "epoch": 9.0, "grad_norm": 3.705313205718994, "learning_rate": 2.7500000000000004e-05, "loss": 0.3849, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8245614035087719, "eval_f1": 0.7894173351830629, "eval_loss": 0.3829491138458252, "eval_precision": 0.7880252100840336, "eval_recall": 0.7908710674668122, "eval_runtime": 5.052, "eval_samples_per_second": 78.978, "eval_steps_per_second": 9.897, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.997393608093262, "learning_rate": 2.5e-05, "loss": 0.3771, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.8065102745953809, "eval_loss": 0.3785531520843506, "eval_precision": 0.8065102745953809, "eval_recall": 0.8065102745953809, "eval_runtime": 5.0689, "eval_samples_per_second": 78.715, "eval_steps_per_second": 9.864, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.873344659805298, "learning_rate": 2.25e-05, "loss": 0.3633, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.7992838965496833, "eval_loss": 0.3843457102775574, "eval_precision": 0.7931478693839741, "eval_recall": 0.8069194398981633, "eval_runtime": 5.0454, "eval_samples_per_second": 79.082, "eval_steps_per_second": 9.91, "step": 1342 }, { "epoch": 12.0, "grad_norm": 10.848413467407227, "learning_rate": 2e-05, "loss": 0.3591, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.7992838965496833, "eval_loss": 0.3832751214504242, "eval_precision": 0.7931478693839741, "eval_recall": 0.8069194398981633, "eval_runtime": 5.0453, "eval_samples_per_second": 79.083, "eval_steps_per_second": 9.91, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.711672306060791, "learning_rate": 1.75e-05, "loss": 0.354, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.8065102745953809, "eval_loss": 0.3705191910266876, "eval_precision": 0.8065102745953809, "eval_recall": 0.8065102745953809, "eval_runtime": 5.0497, "eval_samples_per_second": 79.015, "eval_steps_per_second": 9.902, "step": 1586 }, { "epoch": 14.0, "grad_norm": 8.37232494354248, "learning_rate": 1.5e-05, "loss": 0.3451, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8049369344976196, "eval_loss": 0.37085026502609253, "eval_precision": 0.8027777777777778, "eval_recall": 0.8072376795781051, "eval_runtime": 5.0674, "eval_samples_per_second": 78.738, "eval_steps_per_second": 9.867, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.1326128244400024, "learning_rate": 1.25e-05, "loss": 0.3403, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.8026866442779643, "eval_loss": 0.3732873201370239, "eval_precision": 0.795995733394834, "eval_recall": 0.8111929441716675, "eval_runtime": 5.1259, "eval_samples_per_second": 77.841, "eval_steps_per_second": 9.754, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.462644100189209, "learning_rate": 1e-05, "loss": 0.3282, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.8060710498409331, "eval_loss": 0.3714647889137268, "eval_precision": 0.7988372093023256, "eval_recall": 0.8154664484451719, "eval_runtime": 5.0456, "eval_samples_per_second": 79.079, "eval_steps_per_second": 9.91, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.0335631370544434, "learning_rate": 7.5e-06, "loss": 0.3286, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.7999041923338897, "eval_loss": 0.36644819378852844, "eval_precision": 0.7965023376930815, "eval_recall": 0.803691580287325, "eval_runtime": 5.0509, "eval_samples_per_second": 78.996, "eval_steps_per_second": 9.899, "step": 2074 }, { "epoch": 18.0, "grad_norm": 2.8270199298858643, "learning_rate": 5e-06, "loss": 0.3348, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7949075143216848, "eval_loss": 0.3669916093349457, "eval_precision": 0.7904105571847508, "eval_recall": 0.8001454809965449, "eval_runtime": 5.0497, "eval_samples_per_second": 79.014, "eval_steps_per_second": 9.902, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.275764465332031, "learning_rate": 2.5e-06, "loss": 0.325, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.801779557335113, "eval_loss": 0.36691346764564514, "eval_precision": 0.7960927960927962, "eval_recall": 0.8086924895435534, "eval_runtime": 5.0733, "eval_samples_per_second": 78.646, "eval_steps_per_second": 9.855, "step": 2318 }, { "epoch": 20.0, "grad_norm": 6.228915691375732, "learning_rate": 0.0, "loss": 0.3266, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.801779557335113, "eval_loss": 0.3671587407588959, "eval_precision": 0.7960927960927962, "eval_recall": 0.8086924895435534, "eval_runtime": 5.1045, "eval_samples_per_second": 78.166, "eval_steps_per_second": 9.795, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7590599775312000.0, "train_loss": 0.3947384412171411, "train_runtime": 1950.9891, "train_samples_per_second": 37.294, "train_steps_per_second": 1.251 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7590599775312000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }