{ "best_metric": 0.27383747696876526, "best_model_checkpoint": "models/roberta-teacher/checkpoint-8346", "epoch": 4.0, "global_step": 16692, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 1.952072849269111e-05, "loss": 0.4076, "step": 500 }, { "epoch": 0.24, "learning_rate": 1.904145698538222e-05, "loss": 0.3484, "step": 1000 }, { "epoch": 0.36, "learning_rate": 1.856218547807333e-05, "loss": 0.3104, "step": 1500 }, { "epoch": 0.48, "learning_rate": 1.808291397076444e-05, "loss": 0.3241, "step": 2000 }, { "epoch": 0.6, "learning_rate": 1.7603642463455548e-05, "loss": 0.31, "step": 2500 }, { "epoch": 0.72, "learning_rate": 1.712437095614666e-05, "loss": 0.3088, "step": 3000 }, { "epoch": 0.84, "learning_rate": 1.6645099448837766e-05, "loss": 0.303, "step": 3500 }, { "epoch": 0.96, "learning_rate": 1.616582794152888e-05, "loss": 0.3046, "step": 4000 }, { "epoch": 1.0, "eval_accuracy": 0.8758252121974222, "eval_f1": 0.8723680200515267, "eval_kappa": 0.5282754100007345, "eval_loss": 0.28437086939811707, "eval_precision": 0.8700935888757716, "eval_recall": 0.8758252121974222, "eval_runtime": 101.4778, "eval_samples_per_second": 94.04, "step": 4173 }, { "epoch": 1.08, "learning_rate": 1.5686556434219985e-05, "loss": 0.2827, "step": 4500 }, { "epoch": 1.2, "learning_rate": 1.5207284926911097e-05, "loss": 0.2861, "step": 5000 }, { "epoch": 1.32, "learning_rate": 1.4728013419602205e-05, "loss": 0.3014, "step": 5500 }, { "epoch": 1.44, "learning_rate": 1.4248741912293316e-05, "loss": 0.2807, "step": 6000 }, { "epoch": 1.56, "learning_rate": 1.3769470404984425e-05, "loss": 0.2924, "step": 6500 }, { "epoch": 1.68, "learning_rate": 1.3290198897675534e-05, "loss": 0.3189, "step": 7000 }, { "epoch": 1.8, "learning_rate": 1.2810927390366643e-05, "loss": 0.2973, "step": 7500 }, { "epoch": 1.92, "learning_rate": 1.2331655883057754e-05, "loss": 0.266, "step": 8000 }, { "epoch": 2.0, "eval_accuracy": 0.8917531174683013, "eval_f1": 0.8844462847060539, "eval_kappa": 0.560786854721061, "eval_loss": 0.27383747696876526, "eval_precision": 0.8838724313266608, "eval_recall": 0.8917531174683013, "eval_runtime": 100.1991, "eval_samples_per_second": 95.24, "step": 8346 }, { "epoch": 2.04, "learning_rate": 1.1852384375748862e-05, "loss": 0.2906, "step": 8500 }, { "epoch": 2.16, "learning_rate": 1.1373112868439973e-05, "loss": 0.2792, "step": 9000 }, { "epoch": 2.28, "learning_rate": 1.0893841361131082e-05, "loss": 0.2777, "step": 9500 }, { "epoch": 2.4, "learning_rate": 1.0414569853822191e-05, "loss": 0.2786, "step": 10000 }, { "epoch": 2.52, "learning_rate": 9.9352983465133e-06, "loss": 0.2707, "step": 10500 }, { "epoch": 2.64, "learning_rate": 9.45602683920441e-06, "loss": 0.2628, "step": 11000 }, { "epoch": 2.76, "learning_rate": 8.976755331895519e-06, "loss": 0.2794, "step": 11500 }, { "epoch": 2.88, "learning_rate": 8.497483824586628e-06, "loss": 0.2591, "step": 12000 }, { "epoch": 3.0, "learning_rate": 8.018212317277739e-06, "loss": 0.2715, "step": 12500 }, { "epoch": 3.0, "eval_accuracy": 0.8908100178141046, "eval_f1": 0.8893163070910289, "eval_kappa": 0.5958280873418562, "eval_loss": 0.3180936276912689, "eval_precision": 0.8881406833580966, "eval_recall": 0.8908100178141046, "eval_runtime": 100.3133, "eval_samples_per_second": 95.132, "step": 12519 }, { "epoch": 3.12, "learning_rate": 7.538940809968847e-06, "loss": 0.2559, "step": 13000 }, { "epoch": 3.24, "learning_rate": 7.0596693026599574e-06, "loss": 0.2426, "step": 13500 }, { "epoch": 3.35, "learning_rate": 6.580397795351067e-06, "loss": 0.2494, "step": 14000 }, { "epoch": 3.47, "learning_rate": 6.101126288042176e-06, "loss": 0.2481, "step": 14500 }, { "epoch": 3.59, "learning_rate": 5.621854780733286e-06, "loss": 0.2438, "step": 15000 }, { "epoch": 3.71, "learning_rate": 5.142583273424395e-06, "loss": 0.2599, "step": 15500 }, { "epoch": 3.83, "learning_rate": 4.663311766115504e-06, "loss": 0.2401, "step": 16000 }, { "epoch": 3.95, "learning_rate": 4.1840402588066145e-06, "loss": 0.2553, "step": 16500 }, { "epoch": 4.0, "eval_accuracy": 0.8920674840197003, "eval_f1": 0.8862888705299119, "eval_kappa": 0.5716616874732978, "eval_loss": 0.37104833126068115, "eval_precision": 0.884886094916698, "eval_recall": 0.8920674840197003, "eval_runtime": 100.4865, "eval_samples_per_second": 94.968, "step": 16692 } ], "max_steps": 20865, "num_train_epochs": 5, "total_flos": 4.294687728722784e+16, "trial_name": null, "trial_params": null }