|
{ |
|
"best_metric": 5.074450392391248, |
|
"best_model_checkpoint": "./checkpoint-1500", |
|
"epoch": 1.3533333333333333, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.87983978638184e-07, |
|
"loss": 0.676, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.712950600801067e-07, |
|
"loss": 0.3062, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.546061415220292e-07, |
|
"loss": 0.2604, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.379172229639519e-07, |
|
"loss": 0.2175, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.212283044058745e-07, |
|
"loss": 0.2147, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.04539385847797e-07, |
|
"loss": 0.1825, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.885180240320427e-07, |
|
"loss": 0.168, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.718291054739653e-07, |
|
"loss": 0.1771, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.551401869158879e-07, |
|
"loss": 0.1862, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.384512683578104e-07, |
|
"loss": 0.1867, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.21762349799733e-07, |
|
"loss": 0.1652, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.050734312416555e-07, |
|
"loss": 0.1583, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.883845126835781e-07, |
|
"loss": 0.1562, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.716955941255007e-07, |
|
"loss": 0.1607, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.550066755674232e-07, |
|
"loss": 0.1422, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.383177570093458e-07, |
|
"loss": 0.1508, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.216288384512683e-07, |
|
"loss": 0.1556, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.049399198931909e-07, |
|
"loss": 0.1581, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.882510013351135e-07, |
|
"loss": 0.1738, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.71562082777036e-07, |
|
"loss": 0.151, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.548731642189586e-07, |
|
"loss": 0.1471, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.381842456608812e-07, |
|
"loss": 0.1491, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.214953271028037e-07, |
|
"loss": 0.1388, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.048064085447263e-07, |
|
"loss": 0.1521, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.887850467289718e-07, |
|
"loss": 0.1572, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.720961281708944e-07, |
|
"loss": 0.1577, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.55407209612817e-07, |
|
"loss": 0.1416, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.387182910547396e-07, |
|
"loss": 0.1375, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.220293724966622e-07, |
|
"loss": 0.1228, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.053404539385847e-07, |
|
"loss": 0.1556, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.1683349609375, |
|
"eval_runtime": 4390.4106, |
|
"eval_samples_per_second": 3.535, |
|
"eval_steps_per_second": 0.221, |
|
"eval_wer": 5.095888987058885, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.886515353805072e-07, |
|
"loss": 0.1318, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.719626168224299e-07, |
|
"loss": 0.1586, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.5527369826435247e-07, |
|
"loss": 0.1403, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.38584779706275e-07, |
|
"loss": 0.144, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.225634178905207e-07, |
|
"loss": 0.1417, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.058744993324432e-07, |
|
"loss": 0.1544, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.891855807743658e-07, |
|
"loss": 0.1483, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.724966622162884e-07, |
|
"loss": 0.1469, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.558077436582109e-07, |
|
"loss": 0.1782, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.391188251001335e-07, |
|
"loss": 0.2161, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.22429906542056e-07, |
|
"loss": 0.1903, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.057409879839786e-07, |
|
"loss": 0.2003, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.890520694259012e-07, |
|
"loss": 0.2123, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.723631508678237e-07, |
|
"loss": 0.2347, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.5567423230974635e-07, |
|
"loss": 0.1982, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.389853137516689e-07, |
|
"loss": 0.1899, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.2229639519359144e-07, |
|
"loss": 0.1865, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.05607476635514e-07, |
|
"loss": 0.204, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.8891855807743656e-07, |
|
"loss": 0.2119, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.7222963951935914e-07, |
|
"loss": 0.2108, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.555407209612817e-07, |
|
"loss": 0.229, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3885180240320425e-07, |
|
"loss": 0.2851, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.221628838451268e-07, |
|
"loss": 0.1927, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.054739652870494e-07, |
|
"loss": 0.1675, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.878504672897195e-08, |
|
"loss": 0.167, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.209612817089452e-08, |
|
"loss": 0.176, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.5407209612817086e-08, |
|
"loss": 0.1718, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.871829105473965e-08, |
|
"loss": 0.1831, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.2029372496662217e-08, |
|
"loss": 0.1793, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.3404539385847795e-09, |
|
"loss": 0.1732, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.164794921875, |
|
"eval_runtime": 4364.6715, |
|
"eval_samples_per_second": 3.556, |
|
"eval_steps_per_second": 0.222, |
|
"eval_wer": 5.074450392391248, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"step": 1500, |
|
"total_flos": 1.0191220995495297e+20, |
|
"train_loss": 0.1855107421875, |
|
"train_runtime": 20533.0269, |
|
"train_samples_per_second": 2.338, |
|
"train_steps_per_second": 0.073 |
|
} |
|
], |
|
"max_steps": 1500, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 1.0191220995495297e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|