{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.612244897959183, "eval_steps": 500, "global_step": 225, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.9999619230641714e-05, "loss": 1.4178, "step": 1 }, { "epoch": 0.2, "learning_rate": 1.999048221581858e-05, "loss": 1.1959, "step": 5 }, { "epoch": 0.41, "learning_rate": 1.9961946980917457e-05, "loss": 1.0923, "step": 10 }, { "epoch": 0.61, "learning_rate": 1.9914448613738107e-05, "loss": 1.0543, "step": 15 }, { "epoch": 0.61, "eval_loss": 1.0328614711761475, "eval_runtime": 380.3901, "eval_samples_per_second": 61.369, "eval_steps_per_second": 0.481, "step": 15 }, { "epoch": 1.2, "learning_rate": 1.9848077530122083e-05, "loss": 0.979, "step": 20 }, { "epoch": 1.41, "learning_rate": 1.9762960071199334e-05, "loss": 0.9747, "step": 25 }, { "epoch": 1.61, "learning_rate": 1.9659258262890683e-05, "loss": 0.9704, "step": 30 }, { "epoch": 1.61, "eval_loss": 1.0141010284423828, "eval_runtime": 379.2417, "eval_samples_per_second": 61.554, "eval_steps_per_second": 0.483, "step": 30 }, { "epoch": 2.2, "learning_rate": 1.953716950748227e-05, "loss": 0.9064, "step": 35 }, { "epoch": 2.41, "learning_rate": 1.9396926207859085e-05, "loss": 0.9211, "step": 40 }, { "epoch": 2.61, "learning_rate": 1.9238795325112867e-05, "loss": 0.9103, "step": 45 }, { "epoch": 2.61, "eval_loss": 1.0125378370285034, "eval_runtime": 378.9132, "eval_samples_per_second": 61.608, "eval_steps_per_second": 0.483, "step": 45 }, { "epoch": 3.2, "learning_rate": 1.9063077870366504e-05, "loss": 0.8537, "step": 50 }, { "epoch": 3.41, "learning_rate": 1.887010833178222e-05, "loss": 0.8633, "step": 55 }, { "epoch": 3.61, "learning_rate": 1.866025403784439e-05, "loss": 0.8485, "step": 60 }, { "epoch": 3.61, "eval_loss": 1.0221478939056396, "eval_runtime": 379.6533, "eval_samples_per_second": 61.488, "eval_steps_per_second": 0.482, "step": 60 }, { "epoch": 4.2, "learning_rate": 1.843391445812886e-05, "loss": 0.7992, "step": 65 }, { "epoch": 4.41, "learning_rate": 1.819152044288992e-05, "loss": 0.806, "step": 70 }, { "epoch": 4.61, "learning_rate": 1.7933533402912354e-05, "loss": 0.785, "step": 75 }, { "epoch": 4.61, "eval_loss": 1.0448094606399536, "eval_runtime": 379.8413, "eval_samples_per_second": 61.457, "eval_steps_per_second": 0.482, "step": 75 }, { "epoch": 5.2, "learning_rate": 1.766044443118978e-05, "loss": 0.7402, "step": 80 }, { "epoch": 5.41, "learning_rate": 1.737277336810124e-05, "loss": 0.7403, "step": 85 }, { "epoch": 5.61, "learning_rate": 1.7071067811865477e-05, "loss": 0.7207, "step": 90 }, { "epoch": 5.61, "eval_loss": 1.0821477174758911, "eval_runtime": 379.8936, "eval_samples_per_second": 61.449, "eval_steps_per_second": 0.482, "step": 90 }, { "epoch": 6.2, "learning_rate": 1.6755902076156606e-05, "loss": 0.675, "step": 95 }, { "epoch": 6.41, "learning_rate": 1.6427876096865394e-05, "loss": 0.6715, "step": 100 }, { "epoch": 6.61, "learning_rate": 1.608761429008721e-05, "loss": 0.6444, "step": 105 }, { "epoch": 6.61, "eval_loss": 1.1343796253204346, "eval_runtime": 379.781, "eval_samples_per_second": 61.467, "eval_steps_per_second": 0.482, "step": 105 }, { "epoch": 7.2, "learning_rate": 1.573576436351046e-05, "loss": 0.6015, "step": 110 }, { "epoch": 7.41, "learning_rate": 1.5372996083468242e-05, "loss": 0.5976, "step": 115 }, { "epoch": 7.61, "learning_rate": 1.5000000000000002e-05, "loss": 0.5673, "step": 120 }, { "epoch": 7.61, "eval_loss": 1.199308156967163, "eval_runtime": 379.6907, "eval_samples_per_second": 61.482, "eval_steps_per_second": 0.482, "step": 120 }, { "epoch": 8.2, "learning_rate": 1.4617486132350343e-05, "loss": 0.5246, "step": 125 }, { "epoch": 8.41, "learning_rate": 1.4226182617406996e-05, "loss": 0.5178, "step": 130 }, { "epoch": 8.61, "learning_rate": 1.3826834323650899e-05, "loss": 0.4883, "step": 135 }, { "epoch": 8.61, "eval_loss": 1.280012845993042, "eval_runtime": 380.0105, "eval_samples_per_second": 61.43, "eval_steps_per_second": 0.482, "step": 135 }, { "epoch": 9.2, "learning_rate": 1.342020143325669e-05, "loss": 0.4505, "step": 140 }, { "epoch": 9.41, "learning_rate": 1.300705799504273e-05, "loss": 0.439, "step": 145 }, { "epoch": 9.61, "learning_rate": 1.2588190451025209e-05, "loss": 0.4137, "step": 150 }, { "epoch": 9.61, "eval_loss": 1.3778117895126343, "eval_runtime": 380.11, "eval_samples_per_second": 61.414, "eval_steps_per_second": 0.481, "step": 150 }, { "epoch": 10.2, "learning_rate": 1.2164396139381029e-05, "loss": 0.3819, "step": 155 }, { "epoch": 10.41, "learning_rate": 1.1736481776669307e-05, "loss": 0.3717, "step": 160 }, { "epoch": 10.61, "learning_rate": 1.130526192220052e-05, "loss": 0.345, "step": 165 }, { "epoch": 10.61, "eval_loss": 1.4091572761535645, "eval_runtime": 380.7706, "eval_samples_per_second": 61.307, "eval_steps_per_second": 0.481, "step": 165 }, { "epoch": 11.2, "learning_rate": 1.0871557427476585e-05, "loss": 0.3263, "step": 170 }, { "epoch": 11.41, "learning_rate": 1.0436193873653362e-05, "loss": 0.3269, "step": 175 }, { "epoch": 11.61, "learning_rate": 1e-05, "loss": 0.3022, "step": 180 }, { "epoch": 11.61, "eval_loss": 1.5370900630950928, "eval_runtime": 382.2626, "eval_samples_per_second": 61.068, "eval_steps_per_second": 0.479, "step": 180 }, { "epoch": 12.2, "learning_rate": 9.563806126346643e-06, "loss": 0.2824, "step": 185 }, { "epoch": 12.41, "learning_rate": 9.128442572523418e-06, "loss": 0.2818, "step": 190 }, { "epoch": 12.61, "learning_rate": 8.694738077799487e-06, "loss": 0.2649, "step": 195 }, { "epoch": 12.61, "eval_loss": 1.5054309368133545, "eval_runtime": 382.7085, "eval_samples_per_second": 60.997, "eval_steps_per_second": 0.478, "step": 195 }, { "epoch": 13.2, "learning_rate": 8.263518223330698e-06, "loss": 0.2523, "step": 200 }, { "epoch": 13.41, "learning_rate": 7.835603860618973e-06, "loss": 0.2488, "step": 205 }, { "epoch": 13.61, "learning_rate": 7.411809548974792e-06, "loss": 0.2272, "step": 210 }, { "epoch": 13.61, "eval_loss": 1.554245948791504, "eval_runtime": 380.4741, "eval_samples_per_second": 61.355, "eval_steps_per_second": 0.481, "step": 210 }, { "epoch": 14.2, "learning_rate": 6.992942004957271e-06, "loss": 0.2172, "step": 215 }, { "epoch": 14.41, "learning_rate": 6.579798566743314e-06, "loss": 0.213, "step": 220 }, { "epoch": 14.61, "learning_rate": 6.173165676349103e-06, "loss": 0.1929, "step": 225 }, { "epoch": 14.61, "eval_loss": 1.6868618726730347, "eval_runtime": 382.7551, "eval_samples_per_second": 60.989, "eval_steps_per_second": 0.478, "step": 225 }, { "epoch": 14.61, "step": 225, "total_flos": 367408950804480.0, "train_loss": 0.6006989640659756, "train_runtime": 15355.4719, "train_samples_per_second": 12.201, "train_steps_per_second": 0.023 } ], "logging_steps": 5, "max_steps": 360, "num_train_epochs": 15, "save_steps": 500, "total_flos": 367408950804480.0, "trial_name": null, "trial_params": null }