liputan6-lora-8 / trainer_state.json
apwic's picture
End of training
31411b7 verified
raw
history blame
3.21 kB
{
"best_metric": 28.8883,
"best_model_checkpoint": "bin/liputan6-lora-8/checkpoint-252",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.1928575038909912,
"learning_rate": 0.0008,
"loss": 3.1146,
"step": 63
},
{
"epoch": 1.0,
"eval_gen_len": 28.445,
"eval_loss": 2.77751088142395,
"eval_rouge1": 25.1926,
"eval_rouge2": 10.831,
"eval_rougeL": 21.8588,
"eval_rougeLsum": 23.0603,
"eval_runtime": 282.5418,
"eval_samples_per_second": 3.539,
"eval_steps_per_second": 0.113,
"step": 63
},
{
"epoch": 2.0,
"grad_norm": 1.15657377243042,
"learning_rate": 0.0006,
"loss": 2.3694,
"step": 126
},
{
"epoch": 2.0,
"eval_gen_len": 35.297,
"eval_loss": 2.6682474613189697,
"eval_rouge1": 27.3169,
"eval_rouge2": 12.0479,
"eval_rougeL": 23.5983,
"eval_rougeLsum": 25.0305,
"eval_runtime": 263.6368,
"eval_samples_per_second": 3.793,
"eval_steps_per_second": 0.121,
"step": 126
},
{
"epoch": 3.0,
"grad_norm": 1.2961732149124146,
"learning_rate": 0.0004,
"loss": 2.2462,
"step": 189
},
{
"epoch": 3.0,
"eval_gen_len": 31.547,
"eval_loss": 2.610564708709717,
"eval_rouge1": 27.8939,
"eval_rouge2": 12.0631,
"eval_rougeL": 23.6765,
"eval_rougeLsum": 25.5954,
"eval_runtime": 173.0596,
"eval_samples_per_second": 5.778,
"eval_steps_per_second": 0.185,
"step": 189
},
{
"epoch": 4.0,
"grad_norm": 1.227543830871582,
"learning_rate": 0.0002,
"loss": 2.1582,
"step": 252
},
{
"epoch": 4.0,
"eval_gen_len": 31.688,
"eval_loss": 2.6337075233459473,
"eval_rouge1": 28.8883,
"eval_rouge2": 12.7056,
"eval_rougeL": 24.6102,
"eval_rougeLsum": 26.4945,
"eval_runtime": 153.1276,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 0.209,
"step": 252
},
{
"epoch": 5.0,
"grad_norm": 1.2467632293701172,
"learning_rate": 0.0,
"loss": 2.1188,
"step": 315
},
{
"epoch": 5.0,
"eval_gen_len": 32.655,
"eval_loss": 2.638486385345459,
"eval_rouge1": 28.3508,
"eval_rouge2": 12.5031,
"eval_rougeL": 24.1992,
"eval_rougeLsum": 26.092,
"eval_runtime": 177.4741,
"eval_samples_per_second": 5.635,
"eval_steps_per_second": 0.18,
"step": 315
},
{
"epoch": 5.0,
"step": 315,
"total_flos": 3437376307200000.0,
"train_loss": 2.4014356340680805,
"train_runtime": 1409.9107,
"train_samples_per_second": 3.546,
"train_steps_per_second": 0.223
}
],
"logging_steps": 500,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 3437376307200000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}