|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.653179190751445, |
|
"global_step": 17000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.15606936416185e-05, |
|
"loss": 4.1602, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 15.5003, |
|
"eval_loss": 2.8024423122406006, |
|
"eval_rouge1": 0.2552, |
|
"eval_rouge2": 0.1019, |
|
"eval_rougeL": 0.2331, |
|
"eval_rougeLsum": 0.2332, |
|
"eval_runtime": 184.148, |
|
"eval_samples_per_second": 18.778, |
|
"eval_steps_per_second": 1.178, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.9835716458777003e-05, |
|
"loss": 3.1173, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.922725889869182e-05, |
|
"loss": 2.9624, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 15.8271, |
|
"eval_loss": 2.6451988220214844, |
|
"eval_rouge1": 0.2882, |
|
"eval_rouge2": 0.1183, |
|
"eval_rougeL": 0.2626, |
|
"eval_rougeLsum": 0.2626, |
|
"eval_runtime": 182.4271, |
|
"eval_samples_per_second": 18.956, |
|
"eval_steps_per_second": 1.19, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.8618801338606633e-05, |
|
"loss": 2.8766, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.801034377852145e-05, |
|
"loss": 2.8206, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 16.2183, |
|
"eval_loss": 2.5787582397460938, |
|
"eval_rouge1": 0.2952, |
|
"eval_rouge2": 0.1218, |
|
"eval_rougeL": 0.2684, |
|
"eval_rougeLsum": 0.2685, |
|
"eval_runtime": 182.2993, |
|
"eval_samples_per_second": 18.969, |
|
"eval_steps_per_second": 1.19, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.7401886218436267e-05, |
|
"loss": 2.7764, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 16.3418, |
|
"eval_loss": 2.5332629680633545, |
|
"eval_rouge1": 0.3029, |
|
"eval_rouge2": 0.1258, |
|
"eval_rougeL": 0.2755, |
|
"eval_rougeLsum": 0.2754, |
|
"eval_runtime": 181.4496, |
|
"eval_samples_per_second": 19.058, |
|
"eval_steps_per_second": 1.196, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.679342865835108e-05, |
|
"loss": 2.7414, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1.6184971098265897e-05, |
|
"loss": 2.6997, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 16.4101, |
|
"eval_loss": 2.504000186920166, |
|
"eval_rouge1": 0.3046, |
|
"eval_rouge2": 0.1273, |
|
"eval_rougeL": 0.2773, |
|
"eval_rougeLsum": 0.2771, |
|
"eval_runtime": 181.6751, |
|
"eval_samples_per_second": 19.034, |
|
"eval_steps_per_second": 1.194, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 1.5576513538180714e-05, |
|
"loss": 2.6795, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.4968055978095528e-05, |
|
"loss": 2.6647, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 16.354, |
|
"eval_loss": 2.4734175205230713, |
|
"eval_rouge1": 0.3069, |
|
"eval_rouge2": 0.1298, |
|
"eval_rougeL": 0.2802, |
|
"eval_rougeLsum": 0.2801, |
|
"eval_runtime": 181.0262, |
|
"eval_samples_per_second": 19.102, |
|
"eval_steps_per_second": 1.199, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.4359598418010346e-05, |
|
"loss": 2.6287, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.375114085792516e-05, |
|
"loss": 2.6196, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 16.526, |
|
"eval_loss": 2.4547228813171387, |
|
"eval_rouge1": 0.3077, |
|
"eval_rouge2": 0.131, |
|
"eval_rougeL": 0.2804, |
|
"eval_rougeLsum": 0.2804, |
|
"eval_runtime": 182.5951, |
|
"eval_samples_per_second": 18.938, |
|
"eval_steps_per_second": 1.188, |
|
"step": 6055 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 1.3142683297839978e-05, |
|
"loss": 2.5916, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 16.4419, |
|
"eval_loss": 2.435168504714966, |
|
"eval_rouge1": 0.3107, |
|
"eval_rouge2": 0.1332, |
|
"eval_rougeL": 0.2833, |
|
"eval_rougeLsum": 0.2834, |
|
"eval_runtime": 181.1692, |
|
"eval_samples_per_second": 19.087, |
|
"eval_steps_per_second": 1.198, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 1.2534225737754793e-05, |
|
"loss": 2.5986, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 1.1925768177669607e-05, |
|
"loss": 2.5716, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 16.4023, |
|
"eval_loss": 2.4196717739105225, |
|
"eval_rouge1": 0.3092, |
|
"eval_rouge2": 0.132, |
|
"eval_rougeL": 0.2819, |
|
"eval_rougeLsum": 0.2817, |
|
"eval_runtime": 181.3331, |
|
"eval_samples_per_second": 19.07, |
|
"eval_steps_per_second": 1.197, |
|
"step": 7785 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 1.1317310617584426e-05, |
|
"loss": 2.5347, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 1.070885305749924e-05, |
|
"loss": 2.548, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 16.3256, |
|
"eval_loss": 2.4123945236206055, |
|
"eval_rouge1": 0.3094, |
|
"eval_rouge2": 0.1329, |
|
"eval_rougeL": 0.2822, |
|
"eval_rougeLsum": 0.2823, |
|
"eval_runtime": 181.1565, |
|
"eval_samples_per_second": 19.088, |
|
"eval_steps_per_second": 1.198, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 1.0100395497414058e-05, |
|
"loss": 2.5237, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 9.491937937328873e-06, |
|
"loss": 2.5352, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 16.3774, |
|
"eval_loss": 2.402285575866699, |
|
"eval_rouge1": 0.3096, |
|
"eval_rouge2": 0.1323, |
|
"eval_rougeL": 0.2821, |
|
"eval_rougeLsum": 0.282, |
|
"eval_runtime": 181.4776, |
|
"eval_samples_per_second": 19.055, |
|
"eval_steps_per_second": 1.196, |
|
"step": 9515 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 8.883480377243688e-06, |
|
"loss": 2.5087, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 16.4592, |
|
"eval_loss": 2.39676570892334, |
|
"eval_rouge1": 0.3107, |
|
"eval_rouge2": 0.1326, |
|
"eval_rougeL": 0.2831, |
|
"eval_rougeLsum": 0.2832, |
|
"eval_runtime": 182.2228, |
|
"eval_samples_per_second": 18.977, |
|
"eval_steps_per_second": 1.191, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 8.275022817158503e-06, |
|
"loss": 2.514, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 7.66656525707332e-06, |
|
"loss": 2.4865, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 16.4586, |
|
"eval_loss": 2.38985013961792, |
|
"eval_rouge1": 0.3117, |
|
"eval_rouge2": 0.1339, |
|
"eval_rougeL": 0.2841, |
|
"eval_rougeLsum": 0.2842, |
|
"eval_runtime": 181.3381, |
|
"eval_samples_per_second": 19.069, |
|
"eval_steps_per_second": 1.197, |
|
"step": 11245 |
|
}, |
|
{ |
|
"epoch": 13.29, |
|
"learning_rate": 7.058107696988135e-06, |
|
"loss": 2.4861, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 6.449650136902952e-06, |
|
"loss": 2.5081, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 16.4072, |
|
"eval_loss": 2.3885083198547363, |
|
"eval_rouge1": 0.3124, |
|
"eval_rouge2": 0.1337, |
|
"eval_rougeL": 0.2845, |
|
"eval_rougeLsum": 0.2845, |
|
"eval_runtime": 181.2166, |
|
"eval_samples_per_second": 19.082, |
|
"eval_steps_per_second": 1.197, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 14.45, |
|
"learning_rate": 5.841192576817768e-06, |
|
"loss": 2.4748, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 16.4572, |
|
"eval_loss": 2.3791513442993164, |
|
"eval_rouge1": 0.3131, |
|
"eval_rouge2": 0.134, |
|
"eval_rougeL": 0.285, |
|
"eval_rougeLsum": 0.2849, |
|
"eval_runtime": 181.192, |
|
"eval_samples_per_second": 19.085, |
|
"eval_steps_per_second": 1.198, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 5.232735016732583e-06, |
|
"loss": 2.4641, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 4.6242774566473994e-06, |
|
"loss": 2.469, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 16.3907, |
|
"eval_loss": 2.3784728050231934, |
|
"eval_rouge1": 0.3119, |
|
"eval_rouge2": 0.1333, |
|
"eval_rougeL": 0.2841, |
|
"eval_rougeLsum": 0.284, |
|
"eval_runtime": 180.9918, |
|
"eval_samples_per_second": 19.106, |
|
"eval_steps_per_second": 1.199, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 4.0158198965622155e-06, |
|
"loss": 2.4771, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 3.4073623364770307e-06, |
|
"loss": 2.4587, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_gen_len": 16.4488, |
|
"eval_loss": 2.3765616416931152, |
|
"eval_rouge1": 0.3115, |
|
"eval_rouge2": 0.1334, |
|
"eval_rougeL": 0.2835, |
|
"eval_rougeLsum": 0.2834, |
|
"eval_runtime": 181.9602, |
|
"eval_samples_per_second": 19.004, |
|
"eval_steps_per_second": 1.193, |
|
"step": 14705 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 2.798904776391847e-06, |
|
"loss": 2.4679, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 2.1904472163066628e-06, |
|
"loss": 2.4428, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_gen_len": 16.4141, |
|
"eval_loss": 2.375408411026001, |
|
"eval_rouge1": 0.3137, |
|
"eval_rouge2": 0.1346, |
|
"eval_rougeL": 0.2854, |
|
"eval_rougeLsum": 0.2854, |
|
"eval_runtime": 181.1095, |
|
"eval_samples_per_second": 19.093, |
|
"eval_steps_per_second": 1.198, |
|
"step": 15570 |
|
}, |
|
{ |
|
"epoch": 18.5, |
|
"learning_rate": 1.5819896562214788e-06, |
|
"loss": 2.4548, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_gen_len": 16.4451, |
|
"eval_loss": 2.3725531101226807, |
|
"eval_rouge1": 0.3129, |
|
"eval_rouge2": 0.1341, |
|
"eval_rougeL": 0.2849, |
|
"eval_rougeLsum": 0.285, |
|
"eval_runtime": 180.98, |
|
"eval_samples_per_second": 19.107, |
|
"eval_steps_per_second": 1.199, |
|
"step": 16435 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 9.735320961362946e-07, |
|
"loss": 2.4426, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"learning_rate": 3.650745360511105e-07, |
|
"loss": 2.4389, |
|
"step": 17000 |
|
} |
|
], |
|
"max_steps": 17300, |
|
"num_train_epochs": 20, |
|
"total_flos": 6.944051276532941e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|