indosum-seq_bn-rf64-0 / trainer_state.json
apwic's picture
End of training
ede4455 verified
{
"best_metric": 72.0617,
"best_model_checkpoint": "bin/indosum-seq_bn-rf64-0/checkpoint-4460",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 4460,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.8590821623802185,
"learning_rate": 0.0008,
"loss": 0.8386,
"step": 892
},
{
"epoch": 1.0,
"eval_gen_len": 102.556,
"eval_loss": 0.5658084750175476,
"eval_rouge1": 68.0586,
"eval_rouge2": 60.6185,
"eval_rougeL": 65.0879,
"eval_rougeLsum": 67.0846,
"eval_runtime": 199.9063,
"eval_samples_per_second": 3.752,
"eval_steps_per_second": 0.12,
"step": 892
},
{
"epoch": 2.0,
"grad_norm": 1.242629885673523,
"learning_rate": 0.0006,
"loss": 0.646,
"step": 1784
},
{
"epoch": 2.0,
"eval_gen_len": 107.088,
"eval_loss": 0.5345786213874817,
"eval_rouge1": 69.6096,
"eval_rouge2": 62.3885,
"eval_rougeL": 66.6327,
"eval_rougeLsum": 68.7343,
"eval_runtime": 199.0639,
"eval_samples_per_second": 3.768,
"eval_steps_per_second": 0.121,
"step": 1784
},
{
"epoch": 3.0,
"grad_norm": 1.6137309074401855,
"learning_rate": 0.0004,
"loss": 0.6031,
"step": 2676
},
{
"epoch": 3.0,
"eval_gen_len": 101.66933333333333,
"eval_loss": 0.5019331574440002,
"eval_rouge1": 70.498,
"eval_rouge2": 63.0668,
"eval_rougeL": 67.3204,
"eval_rougeLsum": 69.5075,
"eval_runtime": 196.9138,
"eval_samples_per_second": 3.809,
"eval_steps_per_second": 0.122,
"step": 2676
},
{
"epoch": 4.0,
"grad_norm": 1.2560288906097412,
"learning_rate": 0.0002,
"loss": 0.5753,
"step": 3568
},
{
"epoch": 4.0,
"eval_gen_len": 104.52,
"eval_loss": 0.5093210339546204,
"eval_rouge1": 71.6759,
"eval_rouge2": 64.4776,
"eval_rougeL": 68.7095,
"eval_rougeLsum": 70.7692,
"eval_runtime": 202.9171,
"eval_samples_per_second": 3.696,
"eval_steps_per_second": 0.118,
"step": 3568
},
{
"epoch": 5.0,
"grad_norm": 1.0930343866348267,
"learning_rate": 0.0,
"loss": 0.5551,
"step": 4460
},
{
"epoch": 5.0,
"eval_gen_len": 102.57333333333334,
"eval_loss": 0.5045868754386902,
"eval_rouge1": 72.0617,
"eval_rouge2": 64.9137,
"eval_rougeL": 69.0991,
"eval_rougeLsum": 71.1205,
"eval_runtime": 199.9624,
"eval_samples_per_second": 3.751,
"eval_steps_per_second": 0.12,
"step": 4460
},
{
"epoch": 5.0,
"step": 4460,
"total_flos": 4.893105473224704e+16,
"train_loss": 0.6436540338490576,
"train_runtime": 4577.7815,
"train_samples_per_second": 15.577,
"train_steps_per_second": 0.974
}
],
"logging_steps": 500,
"max_steps": 4460,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 4.893105473224704e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}