|
{ |
|
"best_metric": 1.4880632162094116, |
|
"best_model_checkpoint": "./zhko_mbartLarge_19p_run1/checkpoint-5000", |
|
"epoch": 8.973438621679827, |
|
"eval_steps": 5000, |
|
"global_step": 25000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.4329, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.9318, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8063, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.9379498634897e-05, |
|
"loss": 1.7429, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.875899726979399e-05, |
|
"loss": 1.6586, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.8138495904690995e-05, |
|
"loss": 1.5945, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.751799453958799e-05, |
|
"loss": 1.4888, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.689749317448498e-05, |
|
"loss": 1.3603, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.627699180938198e-05, |
|
"loss": 1.2398, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.5656490444278984e-05, |
|
"loss": 1.1911, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_bleu": 9.7977, |
|
"eval_gen_len": 14.6128, |
|
"eval_loss": 1.4880632162094116, |
|
"eval_runtime": 656.7417, |
|
"eval_samples_per_second": 16.966, |
|
"eval_steps_per_second": 1.061, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.5035989079175975e-05, |
|
"loss": 1.1667, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.441548771407297e-05, |
|
"loss": 1.129, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.379498634896997e-05, |
|
"loss": 1.048, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.317448498386697e-05, |
|
"loss": 0.9477, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.2553983618763964e-05, |
|
"loss": 0.8805, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.193348225366096e-05, |
|
"loss": 0.8654, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.131298088855796e-05, |
|
"loss": 0.8357, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 4.069247952345495e-05, |
|
"loss": 0.7993, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.0071978158351954e-05, |
|
"loss": 0.7272, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 3.945147679324895e-05, |
|
"loss": 0.6536, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_bleu": 13.3897, |
|
"eval_gen_len": 14.9179, |
|
"eval_loss": 1.6061058044433594, |
|
"eval_runtime": 661.3374, |
|
"eval_samples_per_second": 16.848, |
|
"eval_steps_per_second": 1.054, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 3.883097542814594e-05, |
|
"loss": 0.6277, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.821047406304294e-05, |
|
"loss": 0.6063, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.758997269793994e-05, |
|
"loss": 0.589, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 3.6969471332836934e-05, |
|
"loss": 0.5473, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 3.634896996773393e-05, |
|
"loss": 0.4878, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3.572846860263093e-05, |
|
"loss": 0.4455, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.510796723752792e-05, |
|
"loss": 0.4362, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 3.4487465872424924e-05, |
|
"loss": 0.4156, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 3.386696450732192e-05, |
|
"loss": 0.4034, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 3.324646314221891e-05, |
|
"loss": 0.3665, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_bleu": 14.0018, |
|
"eval_gen_len": 15.2051, |
|
"eval_loss": 1.7928513288497925, |
|
"eval_runtime": 662.4842, |
|
"eval_samples_per_second": 16.819, |
|
"eval_steps_per_second": 1.052, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 3.262596177711591e-05, |
|
"loss": 0.3223, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 3.200546041201291e-05, |
|
"loss": 0.3039, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 3.1384959046909904e-05, |
|
"loss": 0.2964, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 3.07644576818069e-05, |
|
"loss": 0.2834, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 3.01439563167039e-05, |
|
"loss": 0.2662, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 2.9523454951600893e-05, |
|
"loss": 0.2376, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 2.8902953586497894e-05, |
|
"loss": 0.2152, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 2.828245222139489e-05, |
|
"loss": 0.2076, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 2.7661950856291885e-05, |
|
"loss": 0.1995, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 2.704144949118888e-05, |
|
"loss": 0.194, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_bleu": 14.7102, |
|
"eval_gen_len": 14.7308, |
|
"eval_loss": 1.9398521184921265, |
|
"eval_runtime": 638.6619, |
|
"eval_samples_per_second": 17.446, |
|
"eval_steps_per_second": 1.091, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 2.6420948126085876e-05, |
|
"loss": 0.1763, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 2.5800446760982877e-05, |
|
"loss": 0.1566, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 2.517994539587987e-05, |
|
"loss": 0.1471, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 2.455944403077687e-05, |
|
"loss": 0.1462, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 2.3938942665673866e-05, |
|
"loss": 0.1364, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 2.3318441300570863e-05, |
|
"loss": 0.1297, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 2.269793993546786e-05, |
|
"loss": 0.1174, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 2.2077438570364855e-05, |
|
"loss": 0.1075, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.1456937205261852e-05, |
|
"loss": 0.1041, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 2.083643584015885e-05, |
|
"loss": 0.1004, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"eval_bleu": 14.9684, |
|
"eval_gen_len": 14.8811, |
|
"eval_loss": 2.0678608417510986, |
|
"eval_runtime": 646.6568, |
|
"eval_samples_per_second": 17.23, |
|
"eval_steps_per_second": 1.078, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"step": 25000, |
|
"total_flos": 1.7337038143488e+18, |
|
"train_loss": 0.6854593217468262, |
|
"train_runtime": 45689.3015, |
|
"train_samples_per_second": 29.267, |
|
"train_steps_per_second": 0.915 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 41790, |
|
"num_train_epochs": 15, |
|
"save_steps": 5000, |
|
"total_flos": 1.7337038143488e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|