|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8416206261510129, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.6930632289748316e-05, |
|
"loss": 2.4961, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 1.5879, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.6547783613204956, |
|
"eval_rouge1": 25.4717, |
|
"eval_rouge2": 5.11, |
|
"eval_rougeL": 24.6679, |
|
"eval_rougeLsum": 24.6696, |
|
"eval_runtime": 425.1194, |
|
"eval_samples_per_second": 1.169, |
|
"eval_steps_per_second": 0.294, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.386126457949662e-05, |
|
"loss": 1.6561, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 2.3515, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.5339239835739136, |
|
"eval_rouge1": 26.1748, |
|
"eval_rouge2": 5.9106, |
|
"eval_rougeL": 25.413, |
|
"eval_rougeLsum": 25.3958, |
|
"eval_runtime": 422.3316, |
|
"eval_samples_per_second": 1.177, |
|
"eval_steps_per_second": 0.296, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.079189686924494e-05, |
|
"loss": 1.5772, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_bleu": 2.266, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.4510468244552612, |
|
"eval_rouge1": 28.6891, |
|
"eval_rouge2": 6.0431, |
|
"eval_rougeL": 27.7387, |
|
"eval_rougeLsum": 27.8043, |
|
"eval_runtime": 433.608, |
|
"eval_samples_per_second": 1.146, |
|
"eval_steps_per_second": 0.288, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.772252915899325e-05, |
|
"loss": 1.492, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_bleu": 3.6517, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.3759902715682983, |
|
"eval_rouge1": 29.0257, |
|
"eval_rouge2": 7.8515, |
|
"eval_rougeL": 28.3142, |
|
"eval_rougeLsum": 28.3036, |
|
"eval_runtime": 430.0719, |
|
"eval_samples_per_second": 1.156, |
|
"eval_steps_per_second": 0.291, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4653161448741564e-05, |
|
"loss": 1.4736, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_bleu": 3.4866, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.3425214290618896, |
|
"eval_rouge1": 27.9774, |
|
"eval_rouge2": 6.2175, |
|
"eval_rougeL": 26.7783, |
|
"eval_rougeLsum": 26.7207, |
|
"eval_runtime": 428.7998, |
|
"eval_samples_per_second": 1.159, |
|
"eval_steps_per_second": 0.292, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.158379373848988e-05, |
|
"loss": 1.3856, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_bleu": 3.1649, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.311830997467041, |
|
"eval_rouge1": 27.3532, |
|
"eval_rouge2": 6.5569, |
|
"eval_rougeL": 26.4964, |
|
"eval_rougeLsum": 26.5087, |
|
"eval_runtime": 430.7347, |
|
"eval_samples_per_second": 1.154, |
|
"eval_steps_per_second": 0.29, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8514426028238185e-05, |
|
"loss": 1.3972, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_bleu": 3.5337, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.2867928743362427, |
|
"eval_rouge1": 28.233, |
|
"eval_rouge2": 7.6471, |
|
"eval_rougeL": 27.3651, |
|
"eval_rougeLsum": 27.3354, |
|
"eval_runtime": 428.0559, |
|
"eval_samples_per_second": 1.161, |
|
"eval_steps_per_second": 0.292, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.54450583179865e-05, |
|
"loss": 1.374, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_bleu": 3.5737, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.2571070194244385, |
|
"eval_rouge1": 28.8216, |
|
"eval_rouge2": 7.542, |
|
"eval_rougeL": 27.9166, |
|
"eval_rougeLsum": 27.9353, |
|
"eval_runtime": 432.0511, |
|
"eval_samples_per_second": 1.15, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.237569060773481e-05, |
|
"loss": 1.2207, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_bleu": 3.7983, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.3362118005752563, |
|
"eval_rouge1": 29.9574, |
|
"eval_rouge2": 8.1088, |
|
"eval_rougeL": 28.8866, |
|
"eval_rougeLsum": 28.855, |
|
"eval_runtime": 447.7731, |
|
"eval_samples_per_second": 1.11, |
|
"eval_steps_per_second": 0.141, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.930632289748312e-05, |
|
"loss": 1.1861, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_bleu": 3.6521, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.3295202255249023, |
|
"eval_rouge1": 30.072, |
|
"eval_rouge2": 7.7799, |
|
"eval_rougeL": 28.8417, |
|
"eval_rougeLsum": 28.864, |
|
"eval_runtime": 427.6198, |
|
"eval_samples_per_second": 1.162, |
|
"eval_steps_per_second": 0.147, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.623695518723143e-05, |
|
"loss": 1.1173, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_bleu": 3.9784, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.333518147468567, |
|
"eval_rouge1": 29.736, |
|
"eval_rouge2": 7.9661, |
|
"eval_rougeL": 28.6877, |
|
"eval_rougeLsum": 28.6974, |
|
"eval_runtime": 425.771, |
|
"eval_samples_per_second": 1.167, |
|
"eval_steps_per_second": 0.148, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3167587476979742e-05, |
|
"loss": 1.1255, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_bleu": 4.3021, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.309721827507019, |
|
"eval_rouge1": 29.8176, |
|
"eval_rouge2": 8.4656, |
|
"eval_rougeL": 28.958, |
|
"eval_rougeLsum": 28.9571, |
|
"eval_runtime": 428.4498, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 0.147, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.0098219766728055e-05, |
|
"loss": 1.0909, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_bleu": 4.4782, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.3094563484191895, |
|
"eval_rouge1": 30.0233, |
|
"eval_rouge2": 8.4896, |
|
"eval_rougeL": 29.2562, |
|
"eval_rougeLsum": 29.2375, |
|
"eval_runtime": 429.1068, |
|
"eval_samples_per_second": 1.158, |
|
"eval_steps_per_second": 0.147, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.0288520564763666e-06, |
|
"loss": 1.1205, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_bleu": 4.44, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.2991728782653809, |
|
"eval_rouge1": 29.7164, |
|
"eval_rouge2": 8.007, |
|
"eval_rougeL": 28.5027, |
|
"eval_rougeLsum": 28.5018, |
|
"eval_runtime": 423.565, |
|
"eval_samples_per_second": 1.173, |
|
"eval_steps_per_second": 0.149, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.959484346224679e-06, |
|
"loss": 1.1069, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_bleu": 4.6065, |
|
"eval_gen_len": 9.533199195171026, |
|
"eval_loss": 1.283018708229065, |
|
"eval_rouge1": 29.851, |
|
"eval_rouge2": 8.4312, |
|
"eval_rougeL": 28.8139, |
|
"eval_rougeLsum": 28.8205, |
|
"eval_runtime": 425.8103, |
|
"eval_samples_per_second": 1.167, |
|
"eval_steps_per_second": 0.148, |
|
"step": 3000 |
|
} |
|
], |
|
"max_steps": 3258, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.0430752431509504e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|