{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.653179190751445, "global_step": 17000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.58, "learning_rate": 1.15606936416185e-05, "loss": 4.1602, "step": 500 }, { "epoch": 1.0, "eval_gen_len": 15.5003, "eval_loss": 2.8024423122406006, "eval_rouge1": 0.2552, "eval_rouge2": 0.1019, "eval_rougeL": 0.2331, "eval_rougeLsum": 0.2332, "eval_runtime": 184.148, "eval_samples_per_second": 18.778, "eval_steps_per_second": 1.178, "step": 865 }, { "epoch": 1.16, "learning_rate": 1.9835716458777003e-05, "loss": 3.1173, "step": 1000 }, { "epoch": 1.73, "learning_rate": 1.922725889869182e-05, "loss": 2.9624, "step": 1500 }, { "epoch": 2.0, "eval_gen_len": 15.8271, "eval_loss": 2.6451988220214844, "eval_rouge1": 0.2882, "eval_rouge2": 0.1183, "eval_rougeL": 0.2626, "eval_rougeLsum": 0.2626, "eval_runtime": 182.4271, "eval_samples_per_second": 18.956, "eval_steps_per_second": 1.19, "step": 1730 }, { "epoch": 2.31, "learning_rate": 1.8618801338606633e-05, "loss": 2.8766, "step": 2000 }, { "epoch": 2.89, "learning_rate": 1.801034377852145e-05, "loss": 2.8206, "step": 2500 }, { "epoch": 3.0, "eval_gen_len": 16.2183, "eval_loss": 2.5787582397460938, "eval_rouge1": 0.2952, "eval_rouge2": 0.1218, "eval_rougeL": 0.2684, "eval_rougeLsum": 0.2685, "eval_runtime": 182.2993, "eval_samples_per_second": 18.969, "eval_steps_per_second": 1.19, "step": 2595 }, { "epoch": 3.47, "learning_rate": 1.7401886218436267e-05, "loss": 2.7764, "step": 3000 }, { "epoch": 4.0, "eval_gen_len": 16.3418, "eval_loss": 2.5332629680633545, "eval_rouge1": 0.3029, "eval_rouge2": 0.1258, "eval_rougeL": 0.2755, "eval_rougeLsum": 0.2754, "eval_runtime": 181.4496, "eval_samples_per_second": 19.058, "eval_steps_per_second": 1.196, "step": 3460 }, { "epoch": 4.05, "learning_rate": 1.679342865835108e-05, "loss": 2.7414, "step": 3500 }, { "epoch": 4.62, "learning_rate": 1.6184971098265897e-05, "loss": 2.6997, "step": 4000 }, { "epoch": 5.0, "eval_gen_len": 16.4101, "eval_loss": 2.504000186920166, "eval_rouge1": 0.3046, "eval_rouge2": 0.1273, "eval_rougeL": 0.2773, "eval_rougeLsum": 0.2771, "eval_runtime": 181.6751, "eval_samples_per_second": 19.034, "eval_steps_per_second": 1.194, "step": 4325 }, { "epoch": 5.2, "learning_rate": 1.5576513538180714e-05, "loss": 2.6795, "step": 4500 }, { "epoch": 5.78, "learning_rate": 1.4968055978095528e-05, "loss": 2.6647, "step": 5000 }, { "epoch": 6.0, "eval_gen_len": 16.354, "eval_loss": 2.4734175205230713, "eval_rouge1": 0.3069, "eval_rouge2": 0.1298, "eval_rougeL": 0.2802, "eval_rougeLsum": 0.2801, "eval_runtime": 181.0262, "eval_samples_per_second": 19.102, "eval_steps_per_second": 1.199, "step": 5190 }, { "epoch": 6.36, "learning_rate": 1.4359598418010346e-05, "loss": 2.6287, "step": 5500 }, { "epoch": 6.94, "learning_rate": 1.375114085792516e-05, "loss": 2.6196, "step": 6000 }, { "epoch": 7.0, "eval_gen_len": 16.526, "eval_loss": 2.4547228813171387, "eval_rouge1": 0.3077, "eval_rouge2": 0.131, "eval_rougeL": 0.2804, "eval_rougeLsum": 0.2804, "eval_runtime": 182.5951, "eval_samples_per_second": 18.938, "eval_steps_per_second": 1.188, "step": 6055 }, { "epoch": 7.51, "learning_rate": 1.3142683297839978e-05, "loss": 2.5916, "step": 6500 }, { "epoch": 8.0, "eval_gen_len": 16.4419, "eval_loss": 2.435168504714966, "eval_rouge1": 0.3107, "eval_rouge2": 0.1332, "eval_rougeL": 0.2833, "eval_rougeLsum": 0.2834, "eval_runtime": 181.1692, "eval_samples_per_second": 19.087, "eval_steps_per_second": 1.198, "step": 6920 }, { "epoch": 8.09, "learning_rate": 1.2534225737754793e-05, "loss": 2.5986, "step": 7000 }, { "epoch": 8.67, "learning_rate": 1.1925768177669607e-05, "loss": 2.5716, "step": 7500 }, { "epoch": 9.0, "eval_gen_len": 16.4023, "eval_loss": 2.4196717739105225, "eval_rouge1": 0.3092, "eval_rouge2": 0.132, "eval_rougeL": 0.2819, "eval_rougeLsum": 0.2817, "eval_runtime": 181.3331, "eval_samples_per_second": 19.07, "eval_steps_per_second": 1.197, "step": 7785 }, { "epoch": 9.25, "learning_rate": 1.1317310617584426e-05, "loss": 2.5347, "step": 8000 }, { "epoch": 9.83, "learning_rate": 1.070885305749924e-05, "loss": 2.548, "step": 8500 }, { "epoch": 10.0, "eval_gen_len": 16.3256, "eval_loss": 2.4123945236206055, "eval_rouge1": 0.3094, "eval_rouge2": 0.1329, "eval_rougeL": 0.2822, "eval_rougeLsum": 0.2823, "eval_runtime": 181.1565, "eval_samples_per_second": 19.088, "eval_steps_per_second": 1.198, "step": 8650 }, { "epoch": 10.4, "learning_rate": 1.0100395497414058e-05, "loss": 2.5237, "step": 9000 }, { "epoch": 10.98, "learning_rate": 9.491937937328873e-06, "loss": 2.5352, "step": 9500 }, { "epoch": 11.0, "eval_gen_len": 16.3774, "eval_loss": 2.402285575866699, "eval_rouge1": 0.3096, "eval_rouge2": 0.1323, "eval_rougeL": 0.2821, "eval_rougeLsum": 0.282, "eval_runtime": 181.4776, "eval_samples_per_second": 19.055, "eval_steps_per_second": 1.196, "step": 9515 }, { "epoch": 11.56, "learning_rate": 8.883480377243688e-06, "loss": 2.5087, "step": 10000 }, { "epoch": 12.0, "eval_gen_len": 16.4592, "eval_loss": 2.39676570892334, "eval_rouge1": 0.3107, "eval_rouge2": 0.1326, "eval_rougeL": 0.2831, "eval_rougeLsum": 0.2832, "eval_runtime": 182.2228, "eval_samples_per_second": 18.977, "eval_steps_per_second": 1.191, "step": 10380 }, { "epoch": 12.14, "learning_rate": 8.275022817158503e-06, "loss": 2.514, "step": 10500 }, { "epoch": 12.72, "learning_rate": 7.66656525707332e-06, "loss": 2.4865, "step": 11000 }, { "epoch": 13.0, "eval_gen_len": 16.4586, "eval_loss": 2.38985013961792, "eval_rouge1": 0.3117, "eval_rouge2": 0.1339, "eval_rougeL": 0.2841, "eval_rougeLsum": 0.2842, "eval_runtime": 181.3381, "eval_samples_per_second": 19.069, "eval_steps_per_second": 1.197, "step": 11245 }, { "epoch": 13.29, "learning_rate": 7.058107696988135e-06, "loss": 2.4861, "step": 11500 }, { "epoch": 13.87, "learning_rate": 6.449650136902952e-06, "loss": 2.5081, "step": 12000 }, { "epoch": 14.0, "eval_gen_len": 16.4072, "eval_loss": 2.3885083198547363, "eval_rouge1": 0.3124, "eval_rouge2": 0.1337, "eval_rougeL": 0.2845, "eval_rougeLsum": 0.2845, "eval_runtime": 181.2166, "eval_samples_per_second": 19.082, "eval_steps_per_second": 1.197, "step": 12110 }, { "epoch": 14.45, "learning_rate": 5.841192576817768e-06, "loss": 2.4748, "step": 12500 }, { "epoch": 15.0, "eval_gen_len": 16.4572, "eval_loss": 2.3791513442993164, "eval_rouge1": 0.3131, "eval_rouge2": 0.134, "eval_rougeL": 0.285, "eval_rougeLsum": 0.2849, "eval_runtime": 181.192, "eval_samples_per_second": 19.085, "eval_steps_per_second": 1.198, "step": 12975 }, { "epoch": 15.03, "learning_rate": 5.232735016732583e-06, "loss": 2.4641, "step": 13000 }, { "epoch": 15.61, "learning_rate": 4.6242774566473994e-06, "loss": 2.469, "step": 13500 }, { "epoch": 16.0, "eval_gen_len": 16.3907, "eval_loss": 2.3784728050231934, "eval_rouge1": 0.3119, "eval_rouge2": 0.1333, "eval_rougeL": 0.2841, "eval_rougeLsum": 0.284, "eval_runtime": 180.9918, "eval_samples_per_second": 19.106, "eval_steps_per_second": 1.199, "step": 13840 }, { "epoch": 16.18, "learning_rate": 4.0158198965622155e-06, "loss": 2.4771, "step": 14000 }, { "epoch": 16.76, "learning_rate": 3.4073623364770307e-06, "loss": 2.4587, "step": 14500 }, { "epoch": 17.0, "eval_gen_len": 16.4488, "eval_loss": 2.3765616416931152, "eval_rouge1": 0.3115, "eval_rouge2": 0.1334, "eval_rougeL": 0.2835, "eval_rougeLsum": 0.2834, "eval_runtime": 181.9602, "eval_samples_per_second": 19.004, "eval_steps_per_second": 1.193, "step": 14705 }, { "epoch": 17.34, "learning_rate": 2.798904776391847e-06, "loss": 2.4679, "step": 15000 }, { "epoch": 17.92, "learning_rate": 2.1904472163066628e-06, "loss": 2.4428, "step": 15500 }, { "epoch": 18.0, "eval_gen_len": 16.4141, "eval_loss": 2.375408411026001, "eval_rouge1": 0.3137, "eval_rouge2": 0.1346, "eval_rougeL": 0.2854, "eval_rougeLsum": 0.2854, "eval_runtime": 181.1095, "eval_samples_per_second": 19.093, "eval_steps_per_second": 1.198, "step": 15570 }, { "epoch": 18.5, "learning_rate": 1.5819896562214788e-06, "loss": 2.4548, "step": 16000 }, { "epoch": 19.0, "eval_gen_len": 16.4451, "eval_loss": 2.3725531101226807, "eval_rouge1": 0.3129, "eval_rouge2": 0.1341, "eval_rougeL": 0.2849, "eval_rougeLsum": 0.285, "eval_runtime": 180.98, "eval_samples_per_second": 19.107, "eval_steps_per_second": 1.199, "step": 16435 }, { "epoch": 19.08, "learning_rate": 9.735320961362946e-07, "loss": 2.4426, "step": 16500 }, { "epoch": 19.65, "learning_rate": 3.650745360511105e-07, "loss": 2.4389, "step": 17000 } ], "max_steps": 17300, "num_train_epochs": 20, "total_flos": 6.944051276532941e+16, "trial_name": null, "trial_params": null }