{ "best_metric": 0.29289329051971436, "best_model_checkpoint": "/content/aptner_deberta/checkpoint-1000", "epoch": 10.0, "eval_steps": 500, "global_step": 8430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.59, "learning_rate": 1.881376037959668e-05, "loss": 0.6136, "step": 500 }, { "epoch": 0.59, "eval_accuracy": 0.9171865696756323, "eval_f1": 0.5143439638852483, "eval_loss": 0.3298434615135193, "eval_precision": 0.5007088176920896, "eval_recall": 0.5287425149700599, "eval_runtime": 11.2485, "eval_samples_per_second": 155.488, "eval_steps_per_second": 19.469, "step": 500 }, { "epoch": 1.19, "learning_rate": 1.762752075919336e-05, "loss": 0.308, "step": 1000 }, { "epoch": 1.19, "eval_accuracy": 0.9205001423800772, "eval_f1": 0.5688849970811442, "eval_loss": 0.29289329051971436, "eval_precision": 0.5549544419134397, "eval_recall": 0.5835329341317366, "eval_runtime": 11.4351, "eval_samples_per_second": 152.95, "eval_steps_per_second": 19.152, "step": 1000 }, { "epoch": 1.78, "learning_rate": 1.6441281138790037e-05, "loss": 0.2428, "step": 1500 }, { "epoch": 1.78, "eval_accuracy": 0.9176525408371948, "eval_f1": 0.5728531855955679, "eval_loss": 0.31239837408065796, "eval_precision": 0.5329896907216495, "eval_recall": 0.6191616766467066, "eval_runtime": 11.5194, "eval_samples_per_second": 151.831, "eval_steps_per_second": 19.011, "step": 1500 }, { "epoch": 2.37, "learning_rate": 1.5255041518386714e-05, "loss": 0.2088, "step": 2000 }, { "epoch": 2.37, "eval_accuracy": 0.9146755028605452, "eval_f1": 0.5848287112561175, "eval_loss": 0.32038480043411255, "eval_precision": 0.5356075697211156, "eval_recall": 0.6440119760479042, "eval_runtime": 12.1642, "eval_samples_per_second": 143.783, "eval_steps_per_second": 18.004, "step": 2000 }, { "epoch": 2.97, "learning_rate": 1.4068801897983393e-05, "loss": 0.1783, "step": 2500 }, { "epoch": 2.97, "eval_accuracy": 0.9149343757280799, "eval_f1": 0.6023742830465519, "eval_loss": 0.3319493234157562, "eval_precision": 0.5431801780129901, "eval_recall": 0.6760479041916168, "eval_runtime": 11.6403, "eval_samples_per_second": 150.254, "eval_steps_per_second": 18.814, "step": 2500 }, { "epoch": 3.56, "learning_rate": 1.2882562277580073e-05, "loss": 0.1434, "step": 3000 }, { "epoch": 3.56, "eval_accuracy": 0.9202930440860494, "eval_f1": 0.6036738101864737, "eval_loss": 0.3370673358440399, "eval_precision": 0.5639625585023401, "eval_recall": 0.6494011976047904, "eval_runtime": 11.5779, "eval_samples_per_second": 151.064, "eval_steps_per_second": 18.915, "step": 3000 }, { "epoch": 4.15, "learning_rate": 1.169632265717675e-05, "loss": 0.1352, "step": 3500 }, { "epoch": 4.15, "eval_accuracy": 0.9135364622433922, "eval_f1": 0.5807708362320857, "eval_loss": 0.3826988637447357, "eval_precision": 0.5425006498570315, "eval_recall": 0.6248502994011976, "eval_runtime": 11.732, "eval_samples_per_second": 149.08, "eval_steps_per_second": 18.667, "step": 3500 }, { "epoch": 4.74, "learning_rate": 1.0510083036773429e-05, "loss": 0.1135, "step": 4000 }, { "epoch": 4.74, "eval_accuracy": 0.9135623495301457, "eval_f1": 0.5979081159804052, "eval_loss": 0.3862306475639343, "eval_precision": 0.5359601234274863, "eval_recall": 0.6760479041916168, "eval_runtime": 11.4556, "eval_samples_per_second": 152.676, "eval_steps_per_second": 19.117, "step": 4000 }, { "epoch": 5.34, "learning_rate": 9.323843416370107e-06, "loss": 0.0987, "step": 4500 }, { "epoch": 5.34, "eval_accuracy": 0.9141318698387222, "eval_f1": 0.5920873124147339, "eval_loss": 0.3977762758731842, "eval_precision": 0.543859649122807, "eval_recall": 0.6497005988023952, "eval_runtime": 11.4436, "eval_samples_per_second": 152.836, "eval_steps_per_second": 19.137, "step": 4500 }, { "epoch": 5.93, "learning_rate": 8.137603795966786e-06, "loss": 0.0942, "step": 5000 }, { "epoch": 5.93, "eval_accuracy": 0.9224934634600948, "eval_f1": 0.6091399375532217, "eval_loss": 0.37382081151008606, "eval_precision": 0.5790609821910415, "eval_recall": 0.6425149700598802, "eval_runtime": 12.0507, "eval_samples_per_second": 145.137, "eval_steps_per_second": 18.173, "step": 5000 }, { "epoch": 6.52, "learning_rate": 6.951364175563464e-06, "loss": 0.0746, "step": 5500 }, { "epoch": 6.52, "eval_accuracy": 0.9161251909187398, "eval_f1": 0.5943422136775611, "eval_loss": 0.4268680810928345, "eval_precision": 0.5489599188229325, "eval_recall": 0.6479041916167665, "eval_runtime": 11.3716, "eval_samples_per_second": 153.804, "eval_steps_per_second": 19.258, "step": 5500 }, { "epoch": 7.12, "learning_rate": 5.765124555160143e-06, "loss": 0.0727, "step": 6000 }, { "epoch": 7.12, "eval_accuracy": 0.9170571332418649, "eval_f1": 0.5977203224909646, "eval_loss": 0.4236016869544983, "eval_precision": 0.5578619615983393, "eval_recall": 0.6437125748502994, "eval_runtime": 11.9963, "eval_samples_per_second": 145.795, "eval_steps_per_second": 18.256, "step": 6000 }, { "epoch": 7.71, "learning_rate": 4.5788849347568215e-06, "loss": 0.0661, "step": 6500 }, { "epoch": 7.71, "eval_accuracy": 0.9200341712185146, "eval_f1": 0.603626220362622, "eval_loss": 0.4239303171634674, "eval_precision": 0.5650130548302872, "eval_recall": 0.6479041916167665, "eval_runtime": 12.2361, "eval_samples_per_second": 142.938, "eval_steps_per_second": 17.898, "step": 6500 }, { "epoch": 8.3, "learning_rate": 3.3926453143535e-06, "loss": 0.0578, "step": 7000 }, { "epoch": 8.3, "eval_accuracy": 0.9175231044034274, "eval_f1": 0.5931846865797223, "eval_loss": 0.44854551553726196, "eval_precision": 0.5579002901609074, "eval_recall": 0.6332335329341318, "eval_runtime": 12.2264, "eval_samples_per_second": 143.051, "eval_steps_per_second": 17.912, "step": 7000 }, { "epoch": 8.9, "learning_rate": 2.2064056939501782e-06, "loss": 0.0505, "step": 7500 }, { "epoch": 8.9, "eval_accuracy": 0.9162546273525072, "eval_f1": 0.5922411387105778, "eval_loss": 0.4552724361419678, "eval_precision": 0.5546262415054888, "eval_recall": 0.6353293413173653, "eval_runtime": 12.0192, "eval_samples_per_second": 145.517, "eval_steps_per_second": 18.221, "step": 7500 }, { "epoch": 9.49, "learning_rate": 1.0201660735468566e-06, "loss": 0.0513, "step": 8000 }, { "epoch": 9.49, "eval_accuracy": 0.9171347951021254, "eval_f1": 0.597688344241749, "eval_loss": 0.46287792921066284, "eval_precision": 0.5587086696172872, "eval_recall": 0.6425149700598802, "eval_runtime": 11.7878, "eval_samples_per_second": 148.374, "eval_steps_per_second": 18.579, "step": 8000 }, { "epoch": 10.0, "step": 8430, "total_flos": 2261553362554848.0, "train_loss": 0.15120792004278688, "train_runtime": 2180.3385, "train_samples_per_second": 30.931, "train_steps_per_second": 3.866 } ], "logging_steps": 500, "max_steps": 8430, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2261553362554848.0, "trial_name": null, "trial_params": null }