|
{ |
|
"best_metric": 4.878868882500295, |
|
"best_model_checkpoint": "./checkpoint-1000", |
|
"epoch": 1.1523333333333334, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6013176087541454e-06, |
|
"loss": 0.8269, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0137381186086706e-06, |
|
"loss": 0.3045, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.2420279328577924e-06, |
|
"loss": 0.2313, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.40069543942923e-06, |
|
"loss": 0.1914, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.522426425046553e-06, |
|
"loss": 0.1792, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.61759653240662e-06, |
|
"loss": 0.1751, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.701259274807719e-06, |
|
"loss": 0.1753, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.773422504448636e-06, |
|
"loss": 0.1429, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.836868816606194e-06, |
|
"loss": 0.1708, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.893478968555679e-06, |
|
"loss": 0.156, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.944583870724746e-06, |
|
"loss": 0.1573, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.991160014398659e-06, |
|
"loss": 0.157, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.978888888888889e-06, |
|
"loss": 0.1659, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9511111111111114e-06, |
|
"loss": 0.1362, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9233333333333334e-06, |
|
"loss": 0.1465, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8955555555555555e-06, |
|
"loss": 0.1432, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.867777777777778e-06, |
|
"loss": 0.1306, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.84e-06, |
|
"loss": 0.1388, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.812222222222222e-06, |
|
"loss": 0.1434, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7844444444444444e-06, |
|
"loss": 0.1425, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.756666666666667e-06, |
|
"loss": 0.1411, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.7288888888888893e-06, |
|
"loss": 0.1316, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.702222222222222e-06, |
|
"loss": 0.1212, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.674444444444444e-06, |
|
"loss": 0.1117, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.6466666666666666e-06, |
|
"loss": 0.1, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.618888888888889e-06, |
|
"loss": 0.0931, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.5911111111111115e-06, |
|
"loss": 0.0876, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.5633333333333335e-06, |
|
"loss": 0.0943, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.5355555555555555e-06, |
|
"loss": 0.0866, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.507777777777778e-06, |
|
"loss": 0.0817, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.48e-06, |
|
"loss": 0.0977, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.452222222222222e-06, |
|
"loss": 0.0878, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.4244444444444444e-06, |
|
"loss": 0.0819, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.396666666666667e-06, |
|
"loss": 0.0818, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.3688888888888893e-06, |
|
"loss": 0.0829, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.3411111111111113e-06, |
|
"loss": 0.0801, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.3133333333333333e-06, |
|
"loss": 0.0743, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.285555555555556e-06, |
|
"loss": 0.0758, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.257777777777778e-06, |
|
"loss": 0.0771, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.23e-06, |
|
"loss": 0.0774, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.148193359375, |
|
"eval_runtime": 2293.5239, |
|
"eval_samples_per_second": 3.79, |
|
"eval_steps_per_second": 0.237, |
|
"eval_wer": 4.878868882500295, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 6.794147330330198e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|