{ "best_metric": 0.19139504432678223, "best_model_checkpoint": "pri_docidv2/checkpoint-200", "epoch": 10.0, "eval_steps": 100, "global_step": 310, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32, "learning_rate": 0.00029032258064516127, "loss": 0.1105, "step": 10 }, { "epoch": 0.65, "learning_rate": 0.00028064516129032256, "loss": 0.1204, "step": 20 }, { "epoch": 0.97, "learning_rate": 0.00027096774193548386, "loss": 0.0642, "step": 30 }, { "epoch": 1.29, "learning_rate": 0.00026129032258064515, "loss": 0.0199, "step": 40 }, { "epoch": 1.61, "learning_rate": 0.00025161290322580645, "loss": 0.1239, "step": 50 }, { "epoch": 1.94, "learning_rate": 0.00024193548387096771, "loss": 0.0675, "step": 60 }, { "epoch": 2.26, "learning_rate": 0.000232258064516129, "loss": 0.0115, "step": 70 }, { "epoch": 2.58, "learning_rate": 0.0002225806451612903, "loss": 0.095, "step": 80 }, { "epoch": 2.9, "learning_rate": 0.0002129032258064516, "loss": 0.1924, "step": 90 }, { "epoch": 3.23, "learning_rate": 0.00020322580645161287, "loss": 0.0366, "step": 100 }, { "epoch": 3.23, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.3684941828250885, "eval_runtime": 2.4287, "eval_samples_per_second": 86.467, "eval_steps_per_second": 11.117, "step": 100 }, { "epoch": 3.55, "learning_rate": 0.00019354838709677416, "loss": 0.0101, "step": 110 }, { "epoch": 3.87, "learning_rate": 0.00018387096774193548, "loss": 0.0275, "step": 120 }, { "epoch": 4.19, "learning_rate": 0.00017419354838709678, "loss": 0.0815, "step": 130 }, { "epoch": 4.52, "learning_rate": 0.00016451612903225804, "loss": 0.0098, "step": 140 }, { "epoch": 4.84, "learning_rate": 0.00015483870967741934, "loss": 0.0051, "step": 150 }, { "epoch": 5.16, "learning_rate": 0.00014516129032258063, "loss": 0.1023, "step": 160 }, { "epoch": 5.48, "learning_rate": 0.00013548387096774193, "loss": 0.0874, "step": 170 }, { "epoch": 5.81, "learning_rate": 0.00012580645161290322, "loss": 0.0113, "step": 180 }, { "epoch": 6.13, "learning_rate": 0.0001161290322580645, "loss": 0.0469, "step": 190 }, { "epoch": 6.45, "learning_rate": 0.0001064516129032258, "loss": 0.006, "step": 200 }, { "epoch": 6.45, "eval_accuracy": 0.9571428571428572, "eval_loss": 0.19139504432678223, "eval_runtime": 2.4676, "eval_samples_per_second": 85.101, "eval_steps_per_second": 10.942, "step": 200 }, { "epoch": 6.77, "learning_rate": 9.677419354838708e-05, "loss": 0.0467, "step": 210 }, { "epoch": 7.1, "learning_rate": 8.709677419354839e-05, "loss": 0.0042, "step": 220 }, { "epoch": 7.42, "learning_rate": 7.741935483870967e-05, "loss": 0.0036, "step": 230 }, { "epoch": 7.74, "learning_rate": 6.774193548387096e-05, "loss": 0.0026, "step": 240 }, { "epoch": 8.06, "learning_rate": 5.806451612903225e-05, "loss": 0.0032, "step": 250 }, { "epoch": 8.39, "learning_rate": 4.838709677419354e-05, "loss": 0.0028, "step": 260 }, { "epoch": 8.71, "learning_rate": 3.8709677419354835e-05, "loss": 0.0027, "step": 270 }, { "epoch": 9.03, "learning_rate": 2.9032258064516126e-05, "loss": 0.0028, "step": 280 }, { "epoch": 9.35, "learning_rate": 1.9354838709677417e-05, "loss": 0.002, "step": 290 }, { "epoch": 9.68, "learning_rate": 9.677419354838709e-06, "loss": 0.0031, "step": 300 }, { "epoch": 9.68, "eval_accuracy": 0.9571428571428572, "eval_loss": 0.21088813245296478, "eval_runtime": 2.491, "eval_samples_per_second": 84.305, "eval_steps_per_second": 10.839, "step": 300 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.003, "step": 310 }, { "epoch": 10.0, "step": 310, "total_flos": 3.7817107732905984e+17, "train_loss": 0.04214024447625683, "train_runtime": 135.0852, "train_samples_per_second": 36.125, "train_steps_per_second": 2.295 } ], "logging_steps": 10, "max_steps": 310, "num_train_epochs": 10, "save_steps": 100, "total_flos": 3.7817107732905984e+17, "trial_name": null, "trial_params": null }