|
{ |
|
"best_metric": 26.5446, |
|
"best_model_checkpoint": "./ko-en_mbartLarge_exp5p/checkpoint-12000", |
|
"epoch": 7.424593967517401, |
|
"eval_steps": 1000, |
|
"global_step": 16000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8832, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.999580070906294e-05, |
|
"loss": 1.6447, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bleu": 20.0927, |
|
"eval_gen_len": 18.3986, |
|
"eval_loss": 1.533764362335205, |
|
"eval_runtime": 290.0589, |
|
"eval_samples_per_second": 14.852, |
|
"eval_steps_per_second": 0.931, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.998320424697532e-05, |
|
"loss": 1.5391, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.996221484543386e-05, |
|
"loss": 1.4737, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bleu": 22.6168, |
|
"eval_gen_len": 18.5462, |
|
"eval_loss": 1.4056932926177979, |
|
"eval_runtime": 288.0701, |
|
"eval_samples_per_second": 14.955, |
|
"eval_steps_per_second": 0.937, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.993283955568685e-05, |
|
"loss": 1.4048, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.989508824616534e-05, |
|
"loss": 1.3708, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_bleu": 23.158, |
|
"eval_gen_len": 18.5132, |
|
"eval_loss": 1.364464521408081, |
|
"eval_runtime": 287.9921, |
|
"eval_samples_per_second": 14.959, |
|
"eval_steps_per_second": 0.938, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.984897359916788e-05, |
|
"loss": 1.3392, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.979451110660001e-05, |
|
"loss": 1.3357, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_bleu": 24.2178, |
|
"eval_gen_len": 18.4343, |
|
"eval_loss": 1.3166433572769165, |
|
"eval_runtime": 286.7402, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 0.942, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.9731719064769833e-05, |
|
"loss": 1.2855, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.966061856824153e-05, |
|
"loss": 1.2274, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_bleu": 24.8105, |
|
"eval_gen_len": 18.4761, |
|
"eval_loss": 1.2854443788528442, |
|
"eval_runtime": 287.0243, |
|
"eval_samples_per_second": 15.009, |
|
"eval_steps_per_second": 0.941, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.958123350274877e-05, |
|
"loss": 1.2255, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.949359053717043e-05, |
|
"loss": 1.2113, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_bleu": 25.4518, |
|
"eval_gen_len": 18.2672, |
|
"eval_loss": 1.2622113227844238, |
|
"eval_runtime": 282.5378, |
|
"eval_samples_per_second": 15.248, |
|
"eval_steps_per_second": 0.956, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.9397719114571386e-05, |
|
"loss": 1.2059, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.929365144231132e-05, |
|
"loss": 1.1392, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_bleu": 25.6184, |
|
"eval_gen_len": 18.4032, |
|
"eval_loss": 1.253997802734375, |
|
"eval_runtime": 282.541, |
|
"eval_samples_per_second": 15.247, |
|
"eval_steps_per_second": 0.956, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.918142248122488e-05, |
|
"loss": 1.1278, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 4.906106993387679e-05, |
|
"loss": 1.125, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_bleu": 25.3848, |
|
"eval_gen_len": 18.3781, |
|
"eval_loss": 1.2400809526443481, |
|
"eval_runtime": 282.7298, |
|
"eval_samples_per_second": 15.237, |
|
"eval_steps_per_second": 0.955, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.893263423189597e-05, |
|
"loss": 1.1295, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.879615852239274e-05, |
|
"loss": 1.0423, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_bleu": 25.9776, |
|
"eval_gen_len": 18.3387, |
|
"eval_loss": 1.2353969812393188, |
|
"eval_runtime": 282.3157, |
|
"eval_samples_per_second": 15.26, |
|
"eval_steps_per_second": 0.956, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 4.865168865346393e-05, |
|
"loss": 0.9937, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 4.849927315879044e-05, |
|
"loss": 1.011, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_bleu": 26.1619, |
|
"eval_gen_len": 18.4858, |
|
"eval_loss": 1.2417948246002197, |
|
"eval_runtime": 286.4621, |
|
"eval_samples_per_second": 15.039, |
|
"eval_steps_per_second": 0.943, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 4.833896324133269e-05, |
|
"loss": 1.0171, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 4.817081275612937e-05, |
|
"loss": 0.9493, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_bleu": 25.6398, |
|
"eval_gen_len": 18.2273, |
|
"eval_loss": 1.2616289854049683, |
|
"eval_runtime": 280.5938, |
|
"eval_samples_per_second": 15.353, |
|
"eval_steps_per_second": 0.962, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 4.799487819220517e-05, |
|
"loss": 0.8973, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.781121865359366e-05, |
|
"loss": 0.888, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_bleu": 26.5446, |
|
"eval_gen_len": 18.438, |
|
"eval_loss": 1.2328206300735474, |
|
"eval_runtime": 287.1698, |
|
"eval_samples_per_second": 15.002, |
|
"eval_steps_per_second": 0.94, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.761989583948173e-05, |
|
"loss": 0.8919, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.7420974023482126e-05, |
|
"loss": 0.8648, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_bleu": 26.0371, |
|
"eval_gen_len": 18.4074, |
|
"eval_loss": 1.2618447542190552, |
|
"eval_runtime": 284.5653, |
|
"eval_samples_per_second": 15.139, |
|
"eval_steps_per_second": 0.949, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 4.721452003204118e-05, |
|
"loss": 0.7633, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 4.700060322198889e-05, |
|
"loss": 0.776, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_bleu": 26.0043, |
|
"eval_gen_len": 18.4629, |
|
"eval_loss": 1.2669389247894287, |
|
"eval_runtime": 283.3474, |
|
"eval_samples_per_second": 15.204, |
|
"eval_steps_per_second": 0.953, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 4.6779295457239025e-05, |
|
"loss": 0.7702, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 4.6550671084646823e-05, |
|
"loss": 0.7856, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_bleu": 26.2716, |
|
"eval_gen_len": 18.403, |
|
"eval_loss": 1.2591649293899536, |
|
"eval_runtime": 283.0087, |
|
"eval_samples_per_second": 15.222, |
|
"eval_steps_per_second": 0.954, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 4.6314806909032766e-05, |
|
"loss": 0.6933, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 4.607178216738045e-05, |
|
"loss": 0.6997, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"eval_bleu": 25.7842, |
|
"eval_gen_len": 18.3693, |
|
"eval_loss": 1.3154139518737793, |
|
"eval_runtime": 288.6701, |
|
"eval_samples_per_second": 14.924, |
|
"eval_steps_per_second": 0.935, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"step": 16000, |
|
"total_flos": 5.548892425520415e+17, |
|
"train_loss": 1.1159878959655762, |
|
"train_runtime": 16243.1142, |
|
"train_samples_per_second": 84.895, |
|
"train_steps_per_second": 5.307 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 86200, |
|
"num_train_epochs": 40, |
|
"save_steps": 1000, |
|
"total_flos": 5.548892425520415e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|