{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.585148334503174, "learning_rate": 4.75e-05, "loss": 0.5413, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7368421052631579, "eval_f1": 0.6531440162271805, "eval_loss": 0.49492353200912476, "eval_precision": 0.6762575228471654, "eval_recall": 0.6437988725222767, "eval_runtime": 1.7908, "eval_samples_per_second": 222.81, "eval_steps_per_second": 27.921, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.6699981689453125, "learning_rate": 4.5e-05, "loss": 0.4306, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8245614035087719, "eval_f1": 0.8018811712631937, "eval_loss": 0.3954322338104248, "eval_precision": 0.7902494331065759, "eval_recall": 0.825877432260411, "eval_runtime": 1.8012, "eval_samples_per_second": 221.517, "eval_steps_per_second": 27.759, "step": 244 }, { "epoch": 3.0, "grad_norm": 2.988449811935425, "learning_rate": 4.25e-05, "loss": 0.3344, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8099340368327992, "eval_loss": 0.33969220519065857, "eval_precision": 0.8370422043948378, "eval_recall": 0.7928714311693035, "eval_runtime": 1.8043, "eval_samples_per_second": 221.138, "eval_steps_per_second": 27.712, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.6507456302642822, "learning_rate": 4e-05, "loss": 0.2925, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8058239259643384, "eval_loss": 0.32114899158477783, "eval_precision": 0.8264119601328903, "eval_recall": 0.7918257865066376, "eval_runtime": 1.7976, "eval_samples_per_second": 221.96, "eval_steps_per_second": 27.814, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.9831173419952393, "learning_rate": 3.7500000000000003e-05, "loss": 0.2794, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8365204824303285, "eval_loss": 0.30638387799263, "eval_precision": 0.8313636363636363, "eval_recall": 0.8424713584288053, "eval_runtime": 1.8026, "eval_samples_per_second": 221.352, "eval_steps_per_second": 27.738, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.175257444381714, "learning_rate": 3.5e-05, "loss": 0.2464, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8453047161322887, "eval_loss": 0.2856903374195099, "eval_precision": 0.835631596867552, "eval_recall": 0.8585197308601564, "eval_runtime": 1.8028, "eval_samples_per_second": 221.325, "eval_steps_per_second": 27.735, "step": 732 }, { "epoch": 7.0, "grad_norm": 3.4121947288513184, "learning_rate": 3.2500000000000004e-05, "loss": 0.2332, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8536612749904566, "eval_loss": 0.28461554646492004, "eval_precision": 0.8496330709593418, "eval_recall": 0.8581105655573741, "eval_runtime": 1.8, "eval_samples_per_second": 221.663, "eval_steps_per_second": 27.777, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.52989387512207, "learning_rate": 3e-05, "loss": 0.2216, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8262195121951219, "eval_loss": 0.2906416654586792, "eval_precision": 0.8360165151709128, "eval_recall": 0.8181942171303873, "eval_runtime": 1.8056, "eval_samples_per_second": 220.983, "eval_steps_per_second": 27.692, "step": 976 }, { "epoch": 9.0, "grad_norm": 8.88005256652832, "learning_rate": 2.7500000000000004e-05, "loss": 0.2123, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8386324041811847, "eval_loss": 0.2781093120574951, "eval_precision": 0.8487869670976828, "eval_recall": 0.830287324968176, "eval_runtime": 1.8334, "eval_samples_per_second": 217.634, "eval_steps_per_second": 27.272, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.3229901790618896, "learning_rate": 2.5e-05, "loss": 0.1911, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8395201930584144, "eval_loss": 0.2896389365196228, "eval_precision": 0.8562091503267973, "eval_recall": 0.8270594653573378, "eval_runtime": 1.8031, "eval_samples_per_second": 221.286, "eval_steps_per_second": 27.73, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.697758138179779, "learning_rate": 2.25e-05, "loss": 0.1878, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8495838108450449, "eval_loss": 0.28144657611846924, "eval_precision": 0.8478991596638655, "eval_recall": 0.8513366066557555, "eval_runtime": 1.8197, "eval_samples_per_second": 219.261, "eval_steps_per_second": 27.476, "step": 1342 }, { "epoch": 12.0, "grad_norm": 1.104419231414795, "learning_rate": 2e-05, "loss": 0.1797, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8393634395533442, "eval_loss": 0.28299498558044434, "eval_precision": 0.8402278542707444, "eval_recall": 0.8385160938352427, "eval_runtime": 1.8064, "eval_samples_per_second": 220.882, "eval_steps_per_second": 27.679, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.5454129576683044, "learning_rate": 1.75e-05, "loss": 0.1746, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8332268672959993, "eval_loss": 0.2900232672691345, "eval_precision": 0.849623687858982, "eval_recall": 0.8210129114384433, "eval_runtime": 1.8092, "eval_samples_per_second": 220.538, "eval_steps_per_second": 27.636, "step": 1586 }, { "epoch": 14.0, "grad_norm": 6.253016471862793, "learning_rate": 1.5e-05, "loss": 0.1677, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.844327731092437, "eval_loss": 0.27983638644218445, "eval_precision": 0.8411320530352577, "eval_recall": 0.8477905073649754, "eval_runtime": 1.8045, "eval_samples_per_second": 221.113, "eval_steps_per_second": 27.708, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.9090829491615295, "learning_rate": 1.25e-05, "loss": 0.1585, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8476882658063937, "eval_loss": 0.28229421377182007, "eval_precision": 0.8437296561519796, "eval_recall": 0.8520640116384797, "eval_runtime": 1.8037, "eval_samples_per_second": 221.212, "eval_steps_per_second": 27.721, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.9292184114456177, "learning_rate": 1e-05, "loss": 0.1575, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8511422740140892, "eval_loss": 0.2815591096878052, "eval_precision": 0.8412921348314606, "eval_recall": 0.8645662847790507, "eval_runtime": 1.8158, "eval_samples_per_second": 219.738, "eval_steps_per_second": 27.536, "step": 1952 }, { "epoch": 17.0, "grad_norm": 4.232359409332275, "learning_rate": 7.5e-06, "loss": 0.146, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8359175094431583, "eval_loss": 0.30267345905303955, "eval_precision": 0.8376607470912432, "eval_recall": 0.8342425895617385, "eval_runtime": 1.8565, "eval_samples_per_second": 214.919, "eval_steps_per_second": 26.932, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.907904624938965, "learning_rate": 5e-06, "loss": 0.1368, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8424651921601347, "eval_loss": 0.29612866044044495, "eval_precision": 0.8372140762463343, "eval_recall": 0.8485179123476996, "eval_runtime": 1.8011, "eval_samples_per_second": 221.536, "eval_steps_per_second": 27.761, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.788642168045044, "learning_rate": 2.5e-06, "loss": 0.133, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8333016825553572, "eval_loss": 0.30237412452697754, "eval_precision": 0.8341507249908615, "eval_recall": 0.8324695399163484, "eval_runtime": 1.8062, "eval_samples_per_second": 220.905, "eval_steps_per_second": 27.682, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.534482479095459, "learning_rate": 0.0, "loss": 0.1377, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8359175094431583, "eval_loss": 0.30191537737846375, "eval_precision": 0.8376607470912432, "eval_recall": 0.8342425895617385, "eval_runtime": 1.8052, "eval_samples_per_second": 221.033, "eval_steps_per_second": 27.698, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.2281026918379987, "train_runtime": 636.1271, "train_samples_per_second": 114.38, "train_steps_per_second": 3.836 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }