byt5-base-es_kbh / trainer_state.json
mekjr1's picture
End of training
c24b3d7
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 3950,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 0.0232,
"eval_gen_len": 19.0,
"eval_loss": 1.0572127103805542,
"eval_runtime": 39.011,
"eval_samples_per_second": 20.251,
"eval_steps_per_second": 1.282,
"step": 395
},
{
"epoch": 1.27,
"learning_rate": 4.367088607594937e-05,
"loss": 1.5347,
"step": 500
},
{
"epoch": 2.0,
"eval_bleu": 0.1035,
"eval_gen_len": 19.0,
"eval_loss": 0.9248576760292053,
"eval_runtime": 39.2027,
"eval_samples_per_second": 20.152,
"eval_steps_per_second": 1.275,
"step": 790
},
{
"epoch": 2.53,
"learning_rate": 3.7341772151898736e-05,
"loss": 1.0539,
"step": 1000
},
{
"epoch": 3.0,
"eval_bleu": 0.1313,
"eval_gen_len": 19.0,
"eval_loss": 0.854935884475708,
"eval_runtime": 39.1519,
"eval_samples_per_second": 20.178,
"eval_steps_per_second": 1.277,
"step": 1185
},
{
"epoch": 3.8,
"learning_rate": 3.10126582278481e-05,
"loss": 0.9461,
"step": 1500
},
{
"epoch": 4.0,
"eval_bleu": 0.1392,
"eval_gen_len": 19.0,
"eval_loss": 0.8109087347984314,
"eval_runtime": 39.0695,
"eval_samples_per_second": 20.22,
"eval_steps_per_second": 1.28,
"step": 1580
},
{
"epoch": 5.0,
"eval_bleu": 0.1545,
"eval_gen_len": 19.0,
"eval_loss": 0.7860050201416016,
"eval_runtime": 39.2946,
"eval_samples_per_second": 20.105,
"eval_steps_per_second": 1.272,
"step": 1975
},
{
"epoch": 5.06,
"learning_rate": 2.468354430379747e-05,
"loss": 0.8824,
"step": 2000
},
{
"epoch": 6.0,
"eval_bleu": 0.1756,
"eval_gen_len": 19.0,
"eval_loss": 0.7620316743850708,
"eval_runtime": 39.148,
"eval_samples_per_second": 20.18,
"eval_steps_per_second": 1.277,
"step": 2370
},
{
"epoch": 6.33,
"learning_rate": 1.8354430379746836e-05,
"loss": 0.8447,
"step": 2500
},
{
"epoch": 7.0,
"eval_bleu": 0.1705,
"eval_gen_len": 19.0,
"eval_loss": 0.7495388984680176,
"eval_runtime": 39.4044,
"eval_samples_per_second": 20.049,
"eval_steps_per_second": 1.269,
"step": 2765
},
{
"epoch": 7.59,
"learning_rate": 1.2025316455696203e-05,
"loss": 0.814,
"step": 3000
},
{
"epoch": 8.0,
"eval_bleu": 0.14,
"eval_gen_len": 19.0,
"eval_loss": 0.7383524775505066,
"eval_runtime": 39.2093,
"eval_samples_per_second": 20.148,
"eval_steps_per_second": 1.275,
"step": 3160
},
{
"epoch": 8.86,
"learning_rate": 5.69620253164557e-06,
"loss": 0.8031,
"step": 3500
},
{
"epoch": 9.0,
"eval_bleu": 0.1807,
"eval_gen_len": 19.0,
"eval_loss": 0.7330699563026428,
"eval_runtime": 39.1087,
"eval_samples_per_second": 20.2,
"eval_steps_per_second": 1.278,
"step": 3555
},
{
"epoch": 10.0,
"eval_bleu": 0.1841,
"eval_gen_len": 19.0,
"eval_loss": 0.7314567565917969,
"eval_runtime": 39.0954,
"eval_samples_per_second": 20.207,
"eval_steps_per_second": 1.279,
"step": 3950
},
{
"epoch": 10.0,
"step": 3950,
"total_flos": 2.246746162692096e+16,
"train_loss": 0.9606689144086235,
"train_runtime": 2098.7276,
"train_samples_per_second": 30.104,
"train_steps_per_second": 1.882
}
],
"max_steps": 3950,
"num_train_epochs": 10,
"total_flos": 2.246746162692096e+16,
"trial_name": null,
"trial_params": null
}