{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.906413485758574, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.9192662920622624e-05, "loss": 3.6238, "step": 500 }, { "epoch": 0.1, "learning_rate": 4.838532584124524e-05, "loss": 2.7692, "step": 1000 }, { "epoch": 0.15, "learning_rate": 4.757798876186786e-05, "loss": 2.5108, "step": 1500 }, { "epoch": 0.19, "learning_rate": 4.677065168249048e-05, "loss": 2.3409, "step": 2000 }, { "epoch": 0.24, "learning_rate": 4.596331460311309e-05, "loss": 2.2195, "step": 2500 }, { "epoch": 0.29, "learning_rate": 4.515597752373571e-05, "loss": 2.1371, "step": 3000 }, { "epoch": 0.34, "learning_rate": 4.434864044435833e-05, "loss": 2.057, "step": 3500 }, { "epoch": 0.39, "learning_rate": 4.354130336498095e-05, "loss": 1.9966, "step": 4000 }, { "epoch": 0.44, "learning_rate": 4.273396628560357e-05, "loss": 1.9476, "step": 4500 }, { "epoch": 0.48, "learning_rate": 4.192662920622619e-05, "loss": 1.8893, "step": 5000 }, { "epoch": 0.53, "learning_rate": 4.1119292126848804e-05, "loss": 1.8574, "step": 5500 }, { "epoch": 0.58, "learning_rate": 4.031195504747142e-05, "loss": 1.8144, "step": 6000 }, { "epoch": 0.63, "learning_rate": 3.950461796809404e-05, "loss": 1.7866, "step": 6500 }, { "epoch": 0.68, "learning_rate": 3.8697280888716657e-05, "loss": 1.7497, "step": 7000 }, { "epoch": 0.73, "learning_rate": 3.788994380933928e-05, "loss": 1.7287, "step": 7500 }, { "epoch": 0.78, "learning_rate": 3.7082606729961894e-05, "loss": 1.6999, "step": 8000 }, { "epoch": 0.82, "learning_rate": 3.6275269650584516e-05, "loss": 1.6749, "step": 8500 }, { "epoch": 0.87, "learning_rate": 3.546793257120713e-05, "loss": 1.6554, "step": 9000 }, { "epoch": 0.92, "learning_rate": 3.466059549182975e-05, "loss": 1.6307, "step": 9500 }, { "epoch": 0.97, "learning_rate": 3.385325841245237e-05, "loss": 1.6148, "step": 10000 }, { "epoch": 1.02, "learning_rate": 3.3045921333074984e-05, "loss": 1.5938, "step": 10500 }, { "epoch": 1.07, "learning_rate": 3.2238584253697606e-05, "loss": 1.5652, "step": 11000 }, { "epoch": 1.11, "learning_rate": 3.143124717432022e-05, "loss": 1.5597, "step": 11500 }, { "epoch": 1.16, "learning_rate": 3.062391009494284e-05, "loss": 1.5457, "step": 12000 }, { "epoch": 1.21, "learning_rate": 2.9816573015565458e-05, "loss": 1.5278, "step": 12500 }, { "epoch": 1.26, "learning_rate": 2.9009235936188077e-05, "loss": 1.5221, "step": 13000 }, { "epoch": 1.31, "learning_rate": 2.82018988568107e-05, "loss": 1.4991, "step": 13500 }, { "epoch": 1.36, "learning_rate": 2.7394561777433314e-05, "loss": 1.4876, "step": 14000 }, { "epoch": 1.4, "learning_rate": 2.6587224698055936e-05, "loss": 1.4747, "step": 14500 }, { "epoch": 1.45, "learning_rate": 2.577988761867855e-05, "loss": 1.4647, "step": 15000 }, { "epoch": 1.5, "learning_rate": 2.497255053930117e-05, "loss": 1.4566, "step": 15500 }, { "epoch": 1.55, "learning_rate": 2.416521345992379e-05, "loss": 1.4414, "step": 16000 }, { "epoch": 1.6, "learning_rate": 2.3357876380546407e-05, "loss": 1.4315, "step": 16500 }, { "epoch": 1.65, "learning_rate": 2.2550539301169023e-05, "loss": 1.4296, "step": 17000 }, { "epoch": 1.7, "learning_rate": 2.1743202221791645e-05, "loss": 1.4214, "step": 17500 }, { "epoch": 1.74, "learning_rate": 2.0935865142414263e-05, "loss": 1.4073, "step": 18000 }, { "epoch": 1.79, "learning_rate": 2.0128528063036882e-05, "loss": 1.4001, "step": 18500 }, { "epoch": 1.84, "learning_rate": 1.9321190983659497e-05, "loss": 1.4014, "step": 19000 }, { "epoch": 1.89, "learning_rate": 1.8513853904282116e-05, "loss": 1.3846, "step": 19500 }, { "epoch": 1.94, "learning_rate": 1.7706516824904734e-05, "loss": 1.3777, "step": 20000 }, { "epoch": 1.99, "learning_rate": 1.6899179745527353e-05, "loss": 1.3754, "step": 20500 }, { "epoch": 2.03, "learning_rate": 1.609184266614997e-05, "loss": 1.3593, "step": 21000 }, { "epoch": 2.08, "learning_rate": 1.528450558677259e-05, "loss": 1.356, "step": 21500 }, { "epoch": 2.13, "learning_rate": 1.4477168507395209e-05, "loss": 1.3527, "step": 22000 }, { "epoch": 2.18, "learning_rate": 1.3669831428017826e-05, "loss": 1.3366, "step": 22500 }, { "epoch": 2.23, "learning_rate": 1.2862494348640444e-05, "loss": 1.3392, "step": 23000 }, { "epoch": 2.28, "learning_rate": 1.2055157269263063e-05, "loss": 1.3282, "step": 23500 }, { "epoch": 2.33, "learning_rate": 1.1247820189885682e-05, "loss": 1.3309, "step": 24000 }, { "epoch": 2.37, "learning_rate": 1.0440483110508299e-05, "loss": 1.3235, "step": 24500 }, { "epoch": 2.42, "learning_rate": 9.633146031130919e-06, "loss": 1.3232, "step": 25000 }, { "epoch": 2.47, "learning_rate": 8.825808951753536e-06, "loss": 1.3155, "step": 25500 }, { "epoch": 2.52, "learning_rate": 8.018471872376155e-06, "loss": 1.309, "step": 26000 }, { "epoch": 2.57, "learning_rate": 7.211134792998773e-06, "loss": 1.3101, "step": 26500 }, { "epoch": 2.62, "learning_rate": 6.403797713621391e-06, "loss": 1.304, "step": 27000 }, { "epoch": 2.66, "learning_rate": 5.59646063424401e-06, "loss": 1.3035, "step": 27500 }, { "epoch": 2.71, "learning_rate": 4.7891235548666275e-06, "loss": 1.2952, "step": 28000 }, { "epoch": 2.76, "learning_rate": 3.981786475489246e-06, "loss": 1.2988, "step": 28500 }, { "epoch": 2.81, "learning_rate": 3.174449396111865e-06, "loss": 1.2876, "step": 29000 }, { "epoch": 2.86, "learning_rate": 2.3671123167344834e-06, "loss": 1.2922, "step": 29500 }, { "epoch": 2.91, "learning_rate": 1.5597752373571014e-06, "loss": 1.2881, "step": 30000 } ], "max_steps": 30966, "num_train_epochs": 3, "total_flos": 1.5832637769977856e+17, "trial_name": null, "trial_params": null }