{ "best_metric": 26.5446, "best_model_checkpoint": "./ko-en_mbartLarge_exp5p/checkpoint-12000", "epoch": 7.424593967517401, "eval_steps": 1000, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "learning_rate": 5e-05, "loss": 1.8832, "step": 500 }, { "epoch": 0.46, "learning_rate": 4.999580070906294e-05, "loss": 1.6447, "step": 1000 }, { "epoch": 0.46, "eval_bleu": 20.0927, "eval_gen_len": 18.3986, "eval_loss": 1.533764362335205, "eval_runtime": 290.0589, "eval_samples_per_second": 14.852, "eval_steps_per_second": 0.931, "step": 1000 }, { "epoch": 0.7, "learning_rate": 4.998320424697532e-05, "loss": 1.5391, "step": 1500 }, { "epoch": 0.93, "learning_rate": 4.996221484543386e-05, "loss": 1.4737, "step": 2000 }, { "epoch": 0.93, "eval_bleu": 22.6168, "eval_gen_len": 18.5462, "eval_loss": 1.4056932926177979, "eval_runtime": 288.0701, "eval_samples_per_second": 14.955, "eval_steps_per_second": 0.937, "step": 2000 }, { "epoch": 1.16, "learning_rate": 4.993283955568685e-05, "loss": 1.4048, "step": 2500 }, { "epoch": 1.39, "learning_rate": 4.989508824616534e-05, "loss": 1.3708, "step": 3000 }, { "epoch": 1.39, "eval_bleu": 23.158, "eval_gen_len": 18.5132, "eval_loss": 1.364464521408081, "eval_runtime": 287.9921, "eval_samples_per_second": 14.959, "eval_steps_per_second": 0.938, "step": 3000 }, { "epoch": 1.62, "learning_rate": 4.984897359916788e-05, "loss": 1.3392, "step": 3500 }, { "epoch": 1.86, "learning_rate": 4.979451110660001e-05, "loss": 1.3357, "step": 4000 }, { "epoch": 1.86, "eval_bleu": 24.2178, "eval_gen_len": 18.4343, "eval_loss": 1.3166433572769165, "eval_runtime": 286.7402, "eval_samples_per_second": 15.024, "eval_steps_per_second": 0.942, "step": 4000 }, { "epoch": 2.09, "learning_rate": 4.9731719064769833e-05, "loss": 1.2855, "step": 4500 }, { "epoch": 2.32, "learning_rate": 4.966061856824153e-05, "loss": 1.2274, "step": 5000 }, { "epoch": 2.32, "eval_bleu": 24.8105, "eval_gen_len": 18.4761, "eval_loss": 1.2854443788528442, "eval_runtime": 287.0243, "eval_samples_per_second": 15.009, "eval_steps_per_second": 0.941, "step": 5000 }, { "epoch": 2.55, "learning_rate": 4.958123350274877e-05, "loss": 1.2255, "step": 5500 }, { "epoch": 2.78, "learning_rate": 4.949359053717043e-05, "loss": 1.2113, "step": 6000 }, { "epoch": 2.78, "eval_bleu": 25.4518, "eval_gen_len": 18.2672, "eval_loss": 1.2622113227844238, "eval_runtime": 282.5378, "eval_samples_per_second": 15.248, "eval_steps_per_second": 0.956, "step": 6000 }, { "epoch": 3.02, "learning_rate": 4.9397719114571386e-05, "loss": 1.2059, "step": 6500 }, { "epoch": 3.25, "learning_rate": 4.929365144231132e-05, "loss": 1.1392, "step": 7000 }, { "epoch": 3.25, "eval_bleu": 25.6184, "eval_gen_len": 18.4032, "eval_loss": 1.253997802734375, "eval_runtime": 282.541, "eval_samples_per_second": 15.247, "eval_steps_per_second": 0.956, "step": 7000 }, { "epoch": 3.48, "learning_rate": 4.918142248122488e-05, "loss": 1.1278, "step": 7500 }, { "epoch": 3.71, "learning_rate": 4.906106993387679e-05, "loss": 1.125, "step": 8000 }, { "epoch": 3.71, "eval_bleu": 25.3848, "eval_gen_len": 18.3781, "eval_loss": 1.2400809526443481, "eval_runtime": 282.7298, "eval_samples_per_second": 15.237, "eval_steps_per_second": 0.955, "step": 8000 }, { "epoch": 3.94, "learning_rate": 4.893263423189597e-05, "loss": 1.1295, "step": 8500 }, { "epoch": 4.18, "learning_rate": 4.879615852239274e-05, "loss": 1.0423, "step": 9000 }, { "epoch": 4.18, "eval_bleu": 25.9776, "eval_gen_len": 18.3387, "eval_loss": 1.2353969812393188, "eval_runtime": 282.3157, "eval_samples_per_second": 15.26, "eval_steps_per_second": 0.956, "step": 9000 }, { "epoch": 4.41, "learning_rate": 4.865168865346393e-05, "loss": 0.9937, "step": 9500 }, { "epoch": 4.64, "learning_rate": 4.849927315879044e-05, "loss": 1.011, "step": 10000 }, { "epoch": 4.64, "eval_bleu": 26.1619, "eval_gen_len": 18.4858, "eval_loss": 1.2417948246002197, "eval_runtime": 286.4621, "eval_samples_per_second": 15.039, "eval_steps_per_second": 0.943, "step": 10000 }, { "epoch": 4.87, "learning_rate": 4.833896324133269e-05, "loss": 1.0171, "step": 10500 }, { "epoch": 5.1, "learning_rate": 4.817081275612937e-05, "loss": 0.9493, "step": 11000 }, { "epoch": 5.1, "eval_bleu": 25.6398, "eval_gen_len": 18.2273, "eval_loss": 1.2616289854049683, "eval_runtime": 280.5938, "eval_samples_per_second": 15.353, "eval_steps_per_second": 0.962, "step": 11000 }, { "epoch": 5.34, "learning_rate": 4.799487819220517e-05, "loss": 0.8973, "step": 11500 }, { "epoch": 5.57, "learning_rate": 4.781121865359366e-05, "loss": 0.888, "step": 12000 }, { "epoch": 5.57, "eval_bleu": 26.5446, "eval_gen_len": 18.438, "eval_loss": 1.2328206300735474, "eval_runtime": 287.1698, "eval_samples_per_second": 15.002, "eval_steps_per_second": 0.94, "step": 12000 }, { "epoch": 5.8, "learning_rate": 4.761989583948173e-05, "loss": 0.8919, "step": 12500 }, { "epoch": 6.03, "learning_rate": 4.7420974023482126e-05, "loss": 0.8648, "step": 13000 }, { "epoch": 6.03, "eval_bleu": 26.0371, "eval_gen_len": 18.4074, "eval_loss": 1.2618447542190552, "eval_runtime": 284.5653, "eval_samples_per_second": 15.139, "eval_steps_per_second": 0.949, "step": 13000 }, { "epoch": 6.26, "learning_rate": 4.721452003204118e-05, "loss": 0.7633, "step": 13500 }, { "epoch": 6.5, "learning_rate": 4.700060322198889e-05, "loss": 0.776, "step": 14000 }, { "epoch": 6.5, "eval_bleu": 26.0043, "eval_gen_len": 18.4629, "eval_loss": 1.2669389247894287, "eval_runtime": 283.3474, "eval_samples_per_second": 15.204, "eval_steps_per_second": 0.953, "step": 14000 }, { "epoch": 6.73, "learning_rate": 4.6779295457239025e-05, "loss": 0.7702, "step": 14500 }, { "epoch": 6.96, "learning_rate": 4.6550671084646823e-05, "loss": 0.7856, "step": 15000 }, { "epoch": 6.96, "eval_bleu": 26.2716, "eval_gen_len": 18.403, "eval_loss": 1.2591649293899536, "eval_runtime": 283.0087, "eval_samples_per_second": 15.222, "eval_steps_per_second": 0.954, "step": 15000 }, { "epoch": 7.19, "learning_rate": 4.6314806909032766e-05, "loss": 0.6933, "step": 15500 }, { "epoch": 7.42, "learning_rate": 4.607178216738045e-05, "loss": 0.6997, "step": 16000 }, { "epoch": 7.42, "eval_bleu": 25.7842, "eval_gen_len": 18.3693, "eval_loss": 1.3154139518737793, "eval_runtime": 288.6701, "eval_samples_per_second": 14.924, "eval_steps_per_second": 0.935, "step": 16000 }, { "epoch": 7.42, "step": 16000, "total_flos": 5.548892425520415e+17, "train_loss": 1.1159878959655762, "train_runtime": 16243.1142, "train_samples_per_second": 84.895, "train_steps_per_second": 5.307 } ], "logging_steps": 500, "max_steps": 86200, "num_train_epochs": 40, "save_steps": 1000, "total_flos": 5.548892425520415e+17, "trial_name": null, "trial_params": null }