{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.098039215686274, "eval_steps": 500, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.63, "eval_loss": 2.3024485111236572, "eval_runtime": 52.055, "eval_samples_per_second": 1.594, "eval_steps_per_second": 0.807, "step": 1 }, { "epoch": 1.88, "eval_loss": 2.2997043132781982, "eval_runtime": 52.1609, "eval_samples_per_second": 1.591, "eval_steps_per_second": 0.805, "step": 3 }, { "epoch": 2.51, "eval_loss": 2.2976605892181396, "eval_runtime": 52.0558, "eval_samples_per_second": 1.594, "eval_steps_per_second": 0.807, "step": 4 }, { "epoch": 3.14, "grad_norm": 0.0927734375, "learning_rate": 1.9238795325112867e-05, "loss": 2.2951, "step": 5 }, { "epoch": 3.76, "eval_loss": 2.293534278869629, "eval_runtime": 52.2245, "eval_samples_per_second": 1.589, "eval_steps_per_second": 0.804, "step": 6 }, { "epoch": 4.39, "eval_loss": 2.290515184402466, "eval_runtime": 52.068, "eval_samples_per_second": 1.594, "eval_steps_per_second": 0.807, "step": 7 }, { "epoch": 5.65, "eval_loss": 2.2852978706359863, "eval_runtime": 52.0381, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 9 }, { "epoch": 6.27, "grad_norm": 0.095703125, "learning_rate": 1.7071067811865477e-05, "loss": 2.2845, "step": 10 }, { "epoch": 6.9, "eval_loss": 2.2788708209991455, "eval_runtime": 52.4054, "eval_samples_per_second": 1.584, "eval_steps_per_second": 0.801, "step": 11 }, { "epoch": 7.53, "eval_loss": 2.275712251663208, "eval_runtime": 52.0276, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 12 }, { "epoch": 8.78, "eval_loss": 2.2695822715759277, "eval_runtime": 52.0264, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 14 }, { "epoch": 9.41, "grad_norm": 0.09375, "learning_rate": 1.3826834323650899e-05, "loss": 2.2729, "step": 15 }, { "epoch": 9.41, "eval_loss": 2.266735553741455, "eval_runtime": 52.0513, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 15 }, { "epoch": 10.67, "eval_loss": 2.2618672847747803, "eval_runtime": 52.0274, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 17 }, { "epoch": 11.92, "eval_loss": 2.2575833797454834, "eval_runtime": 52.0229, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 19 }, { "epoch": 12.55, "grad_norm": 0.08984375, "learning_rate": 1e-05, "loss": 2.2599, "step": 20 }, { "epoch": 12.55, "eval_loss": 2.2554843425750732, "eval_runtime": 52.0166, "eval_samples_per_second": 1.596, "eval_steps_per_second": 0.807, "step": 20 }, { "epoch": 13.8, "eval_loss": 2.2518961429595947, "eval_runtime": 52.0251, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 22 }, { "epoch": 14.43, "eval_loss": 2.250413179397583, "eval_runtime": 52.2375, "eval_samples_per_second": 1.589, "eval_steps_per_second": 0.804, "step": 23 }, { "epoch": 15.69, "grad_norm": 0.0869140625, "learning_rate": 6.173165676349103e-06, "loss": 2.2523, "step": 25 }, { "epoch": 15.69, "eval_loss": 2.2478551864624023, "eval_runtime": 52.034, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 25 }, { "epoch": 16.94, "eval_loss": 2.2462329864501953, "eval_runtime": 52.0568, "eval_samples_per_second": 1.594, "eval_steps_per_second": 0.807, "step": 27 }, { "epoch": 17.57, "eval_loss": 2.245408296585083, "eval_runtime": 52.0465, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 28 }, { "epoch": 18.82, "grad_norm": 0.087890625, "learning_rate": 2.9289321881345257e-06, "loss": 2.2471, "step": 30 }, { "epoch": 18.82, "eval_loss": 2.2442374229431152, "eval_runtime": 52.0648, "eval_samples_per_second": 1.594, "eval_steps_per_second": 0.807, "step": 30 }, { "epoch": 19.45, "eval_loss": 2.2436985969543457, "eval_runtime": 52.0526, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 31 }, { "epoch": 20.71, "eval_loss": 2.2432022094726562, "eval_runtime": 52.0653, "eval_samples_per_second": 1.594, "eval_steps_per_second": 0.807, "step": 33 }, { "epoch": 21.96, "grad_norm": 0.08349609375, "learning_rate": 7.612046748871327e-07, "loss": 2.2444, "step": 35 }, { "epoch": 21.96, "eval_loss": 2.24267315864563, "eval_runtime": 52.0344, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 35 }, { "epoch": 22.59, "eval_loss": 2.2428243160247803, "eval_runtime": 52.041, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 36 }, { "epoch": 23.84, "eval_loss": 2.2428712844848633, "eval_runtime": 52.0448, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 38 }, { "epoch": 24.47, "eval_loss": 2.2428550720214844, "eval_runtime": 52.0283, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.807, "step": 39 }, { "epoch": 25.1, "grad_norm": 0.08642578125, "learning_rate": 0.0, "loss": 2.2427, "step": 40 }, { "epoch": 25.1, "eval_loss": 2.2429935932159424, "eval_runtime": 52.018, "eval_samples_per_second": 1.596, "eval_steps_per_second": 0.807, "step": 40 }, { "epoch": 25.1, "step": 40, "total_flos": 5.866939585627423e+18, "train_loss": 2.2623767852783203, "train_runtime": 20272.0714, "train_samples_per_second": 0.805, "train_steps_per_second": 0.002 } ], "logging_steps": 5, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 5.866939585627423e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }