{ "best_metric": 0.6607086658477783, "best_model_checkpoint": "verbnet_train/checkpoint-114510", "epoch": 12.0, "eval_steps": 500, "global_step": 124920, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 13.90185260772705, "learning_rate": 1.960034582132565e-05, "loss": 1.9688, "step": 10410 }, { "epoch": 1.0, "eval_loss": 1.0218104124069214, "eval_runtime": 27.1535, "eval_samples_per_second": 161.453, "eval_steps_per_second": 20.182, "step": 10410 }, { "epoch": 2.0, "grad_norm": 35.40193176269531, "learning_rate": 1.9200499519692607e-05, "loss": 0.9642, "step": 20820 }, { "epoch": 2.0, "eval_loss": 0.8278869986534119, "eval_runtime": 27.1826, "eval_samples_per_second": 161.28, "eval_steps_per_second": 20.16, "step": 20820 }, { "epoch": 3.0, "grad_norm": 0.9018324613571167, "learning_rate": 1.8800653218059558e-05, "loss": 0.7979, "step": 31230 }, { "epoch": 3.0, "eval_loss": 0.7543942928314209, "eval_runtime": 27.2955, "eval_samples_per_second": 160.612, "eval_steps_per_second": 20.077, "step": 31230 }, { "epoch": 4.0, "grad_norm": 10.808300018310547, "learning_rate": 1.840073006724304e-05, "loss": 0.7172, "step": 41640 }, { "epoch": 4.0, "eval_loss": 0.7305750250816345, "eval_runtime": 27.2998, "eval_samples_per_second": 160.587, "eval_steps_per_second": 20.073, "step": 41640 }, { "epoch": 5.0, "grad_norm": 2.197155714035034, "learning_rate": 1.8000922190201732e-05, "loss": 0.6632, "step": 52050 }, { "epoch": 5.0, "eval_loss": 0.7061994671821594, "eval_runtime": 27.2802, "eval_samples_per_second": 160.703, "eval_steps_per_second": 20.088, "step": 52050 }, { "epoch": 6.0, "grad_norm": 6.914452075958252, "learning_rate": 1.7601075888568687e-05, "loss": 0.6295, "step": 62460 }, { "epoch": 6.0, "eval_loss": 0.6925345659255981, "eval_runtime": 27.2037, "eval_samples_per_second": 161.154, "eval_steps_per_second": 20.144, "step": 62460 }, { "epoch": 7.0, "grad_norm": 0.11944844573736191, "learning_rate": 1.720126801152738e-05, "loss": 0.5954, "step": 72870 }, { "epoch": 7.0, "eval_loss": 0.6798752546310425, "eval_runtime": 27.2603, "eval_samples_per_second": 160.82, "eval_steps_per_second": 20.103, "step": 72870 }, { "epoch": 8.0, "grad_norm": 10.490527153015137, "learning_rate": 1.6801383285302596e-05, "loss": 0.5732, "step": 83280 }, { "epoch": 8.0, "eval_loss": 0.6673381924629211, "eval_runtime": 27.2557, "eval_samples_per_second": 160.847, "eval_steps_per_second": 20.106, "step": 83280 }, { "epoch": 9.0, "grad_norm": 3.331533193588257, "learning_rate": 1.6401498559077812e-05, "loss": 0.5513, "step": 93690 }, { "epoch": 9.0, "eval_loss": 0.6665173172950745, "eval_runtime": 27.2846, "eval_samples_per_second": 160.677, "eval_steps_per_second": 20.085, "step": 93690 }, { "epoch": 10.0, "grad_norm": 10.890799522399902, "learning_rate": 1.6001613832853028e-05, "loss": 0.529, "step": 104100 }, { "epoch": 10.0, "eval_loss": 0.6729404330253601, "eval_runtime": 27.2246, "eval_samples_per_second": 161.031, "eval_steps_per_second": 20.129, "step": 104100 }, { "epoch": 11.0, "grad_norm": 2.3458809852600098, "learning_rate": 1.5601729106628244e-05, "loss": 0.5073, "step": 114510 }, { "epoch": 11.0, "eval_loss": 0.6607086658477783, "eval_runtime": 27.203, "eval_samples_per_second": 161.158, "eval_steps_per_second": 20.145, "step": 114510 }, { "epoch": 12.0, "grad_norm": 0.09025446325540543, "learning_rate": 1.5201844380403458e-05, "loss": 0.4939, "step": 124920 }, { "epoch": 12.0, "eval_loss": 0.6753981113433838, "eval_runtime": 27.427, "eval_samples_per_second": 159.842, "eval_steps_per_second": 19.98, "step": 124920 } ], "logging_steps": 500, "max_steps": 520500, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1090149659599789e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }