|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 3950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.0232, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0572127103805542, |
|
"eval_runtime": 39.011, |
|
"eval_samples_per_second": 20.251, |
|
"eval_steps_per_second": 1.282, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.367088607594937e-05, |
|
"loss": 1.5347, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.1035, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9248576760292053, |
|
"eval_runtime": 39.2027, |
|
"eval_samples_per_second": 20.152, |
|
"eval_steps_per_second": 1.275, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.7341772151898736e-05, |
|
"loss": 1.0539, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.1313, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.854935884475708, |
|
"eval_runtime": 39.1519, |
|
"eval_samples_per_second": 20.178, |
|
"eval_steps_per_second": 1.277, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3.10126582278481e-05, |
|
"loss": 0.9461, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.1392, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8109087347984314, |
|
"eval_runtime": 39.0695, |
|
"eval_samples_per_second": 20.22, |
|
"eval_steps_per_second": 1.28, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.1545, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7860050201416016, |
|
"eval_runtime": 39.2946, |
|
"eval_samples_per_second": 20.105, |
|
"eval_steps_per_second": 1.272, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.468354430379747e-05, |
|
"loss": 0.8824, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.1756, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7620316743850708, |
|
"eval_runtime": 39.148, |
|
"eval_samples_per_second": 20.18, |
|
"eval_steps_per_second": 1.277, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.8354430379746836e-05, |
|
"loss": 0.8447, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.1705, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7495388984680176, |
|
"eval_runtime": 39.4044, |
|
"eval_samples_per_second": 20.049, |
|
"eval_steps_per_second": 1.269, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1.2025316455696203e-05, |
|
"loss": 0.814, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.14, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7383524775505066, |
|
"eval_runtime": 39.2093, |
|
"eval_samples_per_second": 20.148, |
|
"eval_steps_per_second": 1.275, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 5.69620253164557e-06, |
|
"loss": 0.8031, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.1807, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7330699563026428, |
|
"eval_runtime": 39.1087, |
|
"eval_samples_per_second": 20.2, |
|
"eval_steps_per_second": 1.278, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.1841, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7314567565917969, |
|
"eval_runtime": 39.0954, |
|
"eval_samples_per_second": 20.207, |
|
"eval_steps_per_second": 1.279, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3950, |
|
"total_flos": 2.246746162692096e+16, |
|
"train_loss": 0.9606689144086235, |
|
"train_runtime": 2098.7276, |
|
"train_samples_per_second": 30.104, |
|
"train_steps_per_second": 1.882 |
|
} |
|
], |
|
"max_steps": 3950, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.246746162692096e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|