|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 2352, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 13.50642145973043, |
|
"eval_loss": 1.9098774194717407, |
|
"eval_runtime": 18.3505, |
|
"eval_samples_per_second": 1.199, |
|
"eval_steps_per_second": 1.199, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 15.388992406642911, |
|
"eval_loss": 1.5389823913574219, |
|
"eval_runtime": 17.9949, |
|
"eval_samples_per_second": 1.223, |
|
"eval_steps_per_second": 1.223, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.5510204081632653, |
|
"grad_norm": 2.0644779205322266, |
|
"learning_rate": 7.891156462585034e-05, |
|
"loss": 2.4223, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 16.361746081538808, |
|
"eval_loss": 1.4969135522842407, |
|
"eval_runtime": 18.102, |
|
"eval_samples_per_second": 1.215, |
|
"eval_steps_per_second": 1.215, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 16.192662011002973, |
|
"eval_loss": 1.558351993560791, |
|
"eval_runtime": 18.0485, |
|
"eval_samples_per_second": 1.219, |
|
"eval_steps_per_second": 1.219, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 20.388609774897613, |
|
"eval_loss": 1.6301382780075073, |
|
"eval_runtime": 17.8816, |
|
"eval_samples_per_second": 1.23, |
|
"eval_steps_per_second": 1.23, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.1020408163265305, |
|
"grad_norm": 3.387509822845459, |
|
"learning_rate": 5.7653061224489805e-05, |
|
"loss": 0.5615, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 20.820547846052477, |
|
"eval_loss": 1.6597435474395752, |
|
"eval_runtime": 18.4966, |
|
"eval_samples_per_second": 1.189, |
|
"eval_steps_per_second": 1.189, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 20.333903309139888, |
|
"eval_loss": 1.710249662399292, |
|
"eval_runtime": 17.8589, |
|
"eval_samples_per_second": 1.232, |
|
"eval_steps_per_second": 1.232, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 7.653061224489796, |
|
"grad_norm": 2.134899377822876, |
|
"learning_rate": 3.639455782312925e-05, |
|
"loss": 0.2154, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 19.92413260385578, |
|
"eval_loss": 1.7504385709762573, |
|
"eval_runtime": 19.3501, |
|
"eval_samples_per_second": 1.137, |
|
"eval_steps_per_second": 1.137, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 21.495754342436005, |
|
"eval_loss": 1.7941405773162842, |
|
"eval_runtime": 18.5215, |
|
"eval_samples_per_second": 1.188, |
|
"eval_steps_per_second": 1.188, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 22.143240405081055, |
|
"eval_loss": 1.8230061531066895, |
|
"eval_runtime": 19.4583, |
|
"eval_samples_per_second": 1.131, |
|
"eval_steps_per_second": 1.131, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 10.204081632653061, |
|
"grad_norm": 0.1605977714061737, |
|
"learning_rate": 1.5136054421768709e-05, |
|
"loss": 0.1043, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 21.334082933744533, |
|
"eval_loss": 1.8319286108016968, |
|
"eval_runtime": 18.1893, |
|
"eval_samples_per_second": 1.21, |
|
"eval_steps_per_second": 1.21, |
|
"step": 2156 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2352, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 637128753020928.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|