{ "best_metric": 0.7896053362979433, "best_model_checkpoint": "tinybert-TG-HS-HX-parentpretrained\\run-6\\checkpoint-1182", "epoch": 6.0, "eval_steps": 500, "global_step": 1182, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.0611700032566007e-05, "loss": 0.1259, "step": 197 }, { "epoch": 1.0, "eval_accuracy": 0.7536131183991106, "eval_loss": 0.12161022424697876, "eval_runtime": 125.6824, "eval_samples_per_second": 57.255, "eval_steps_per_second": 0.454, "step": 197 }, { "epoch": 2.0, "learning_rate": 9.095742885056578e-06, "loss": 0.1236, "step": 394 }, { "epoch": 2.0, "eval_accuracy": 0.7745969983324069, "eval_loss": 0.12096704542636871, "eval_runtime": 125.4944, "eval_samples_per_second": 57.341, "eval_steps_per_second": 0.454, "step": 394 }, { "epoch": 3.0, "learning_rate": 7.579785737547148e-06, "loss": 0.1229, "step": 591 }, { "epoch": 3.0, "eval_accuracy": 0.7848804891606448, "eval_loss": 0.12066732347011566, "eval_runtime": 125.5947, "eval_samples_per_second": 57.295, "eval_steps_per_second": 0.454, "step": 591 }, { "epoch": 4.0, "learning_rate": 6.063828590037718e-06, "loss": 0.1224, "step": 788 }, { "epoch": 4.0, "eval_accuracy": 0.7804335742078933, "eval_loss": 0.12036041915416718, "eval_runtime": 125.5933, "eval_samples_per_second": 57.296, "eval_steps_per_second": 0.454, "step": 788 }, { "epoch": 5.0, "learning_rate": 4.547871442528289e-06, "loss": 0.1221, "step": 985 }, { "epoch": 5.0, "eval_accuracy": 0.7887715397443024, "eval_loss": 0.12023670971393585, "eval_runtime": 125.4555, "eval_samples_per_second": 57.359, "eval_steps_per_second": 0.454, "step": 985 }, { "epoch": 6.0, "learning_rate": 3.031914295018859e-06, "loss": 0.1218, "step": 1182 }, { "epoch": 6.0, "eval_accuracy": 0.7896053362979433, "eval_loss": 0.12014192342758179, "eval_runtime": 125.3237, "eval_samples_per_second": 57.419, "eval_steps_per_second": 0.455, "step": 1182 } ], "logging_steps": 500, "max_steps": 1576, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "total_flos": 265501203931764.0, "trial_name": null, "trial_params": { "alpha": 0.18717092492624732, "learning_rate": 1.2127657180075437e-05, "num_train_epochs": 8, "temperature": 5 } }