byt5-base-es_cbv / trainer_state.json
mekjr1's picture
End of training
e2b50a4
raw
history blame
4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 3860,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 0.0256,
"eval_gen_len": 19.0,
"eval_loss": 0.9372363090515137,
"eval_runtime": 38.1578,
"eval_samples_per_second": 20.206,
"eval_steps_per_second": 1.284,
"step": 386
},
{
"epoch": 1.3,
"learning_rate": 4.352331606217617e-05,
"loss": 1.4765,
"step": 500
},
{
"epoch": 2.0,
"eval_bleu": 0.0303,
"eval_gen_len": 19.0,
"eval_loss": 0.8219472169876099,
"eval_runtime": 38.1767,
"eval_samples_per_second": 20.196,
"eval_steps_per_second": 1.284,
"step": 772
},
{
"epoch": 2.59,
"learning_rate": 3.704663212435233e-05,
"loss": 0.9443,
"step": 1000
},
{
"epoch": 3.0,
"eval_bleu": 0.022,
"eval_gen_len": 19.0,
"eval_loss": 0.7708175778388977,
"eval_runtime": 37.8173,
"eval_samples_per_second": 20.388,
"eval_steps_per_second": 1.296,
"step": 1158
},
{
"epoch": 3.89,
"learning_rate": 3.05699481865285e-05,
"loss": 0.8436,
"step": 1500
},
{
"epoch": 4.0,
"eval_bleu": 0.0562,
"eval_gen_len": 19.0,
"eval_loss": 0.7287958860397339,
"eval_runtime": 38.1538,
"eval_samples_per_second": 20.208,
"eval_steps_per_second": 1.284,
"step": 1544
},
{
"epoch": 5.0,
"eval_bleu": 0.0469,
"eval_gen_len": 19.0,
"eval_loss": 0.7091419696807861,
"eval_runtime": 37.9015,
"eval_samples_per_second": 20.342,
"eval_steps_per_second": 1.293,
"step": 1930
},
{
"epoch": 5.18,
"learning_rate": 2.4093264248704665e-05,
"loss": 0.7916,
"step": 2000
},
{
"epoch": 6.0,
"eval_bleu": 0.0464,
"eval_gen_len": 19.0,
"eval_loss": 0.6926471590995789,
"eval_runtime": 38.039,
"eval_samples_per_second": 20.269,
"eval_steps_per_second": 1.288,
"step": 2316
},
{
"epoch": 6.48,
"learning_rate": 1.761658031088083e-05,
"loss": 0.7568,
"step": 2500
},
{
"epoch": 7.0,
"eval_bleu": 0.0536,
"eval_gen_len": 19.0,
"eval_loss": 0.6799036860466003,
"eval_runtime": 38.099,
"eval_samples_per_second": 20.237,
"eval_steps_per_second": 1.286,
"step": 2702
},
{
"epoch": 7.77,
"learning_rate": 1.1139896373056995e-05,
"loss": 0.7403,
"step": 3000
},
{
"epoch": 8.0,
"eval_bleu": 0.0547,
"eval_gen_len": 19.0,
"eval_loss": 0.6737177968025208,
"eval_runtime": 37.9859,
"eval_samples_per_second": 20.297,
"eval_steps_per_second": 1.29,
"step": 3088
},
{
"epoch": 9.0,
"eval_bleu": 0.0532,
"eval_gen_len": 19.0,
"eval_loss": 0.6666902303695679,
"eval_runtime": 38.1676,
"eval_samples_per_second": 20.2,
"eval_steps_per_second": 1.284,
"step": 3474
},
{
"epoch": 9.07,
"learning_rate": 4.663212435233161e-06,
"loss": 0.7215,
"step": 3500
},
{
"epoch": 10.0,
"eval_bleu": 0.056,
"eval_gen_len": 19.0,
"eval_loss": 0.6674807667732239,
"eval_runtime": 38.091,
"eval_samples_per_second": 20.241,
"eval_steps_per_second": 1.286,
"step": 3860
},
{
"epoch": 10.0,
"step": 3860,
"total_flos": 2.192693390180352e+16,
"train_loss": 0.8798027631532342,
"train_runtime": 2043.5708,
"train_samples_per_second": 30.173,
"train_steps_per_second": 1.889
}
],
"max_steps": 3860,
"num_train_epochs": 10,
"total_flos": 2.192693390180352e+16,
"trial_name": null,
"trial_params": null
}