|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.6775106082036775, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14144271570014144, |
|
"eval_loss": 3.605041265487671, |
|
"eval_runtime": 151.6851, |
|
"eval_samples_per_second": 37.288, |
|
"eval_steps_per_second": 4.661, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2828854314002829, |
|
"eval_loss": 2.980067491531372, |
|
"eval_runtime": 149.8181, |
|
"eval_samples_per_second": 37.752, |
|
"eval_steps_per_second": 4.719, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4243281471004243, |
|
"eval_loss": 2.634444236755371, |
|
"eval_runtime": 149.2391, |
|
"eval_samples_per_second": 37.899, |
|
"eval_steps_per_second": 4.737, |
|
"eval_wer": 0.9829243632745422, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"eval_loss": 1.1875977516174316, |
|
"eval_runtime": 149.4882, |
|
"eval_samples_per_second": 37.836, |
|
"eval_steps_per_second": 4.729, |
|
"eval_wer": 0.8078348927155719, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"grad_norm": 1.7146128416061401, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.377, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"eval_loss": 1.006009578704834, |
|
"eval_runtime": 150.0094, |
|
"eval_samples_per_second": 37.704, |
|
"eval_steps_per_second": 4.713, |
|
"eval_wer": 0.7693826130217779, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8486562942008486, |
|
"eval_loss": 0.8393504619598389, |
|
"eval_runtime": 150.017, |
|
"eval_samples_per_second": 37.702, |
|
"eval_steps_per_second": 4.713, |
|
"eval_wer": 0.6380895828986857, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"eval_loss": 0.7753380537033081, |
|
"eval_runtime": 150.3627, |
|
"eval_samples_per_second": 37.616, |
|
"eval_steps_per_second": 4.702, |
|
"eval_wer": 0.5870552550913963, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"eval_loss": 0.6410965919494629, |
|
"eval_runtime": 150.6587, |
|
"eval_samples_per_second": 37.542, |
|
"eval_steps_per_second": 4.693, |
|
"eval_wer": 0.4923207780327711, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.272984441301273, |
|
"eval_loss": 0.632188618183136, |
|
"eval_runtime": 150.208, |
|
"eval_samples_per_second": 37.654, |
|
"eval_steps_per_second": 4.707, |
|
"eval_wer": 0.5022869156328739, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"grad_norm": 0.9841262102127075, |
|
"learning_rate": 0.00022928571428571426, |
|
"loss": 0.8318, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"eval_loss": 0.5857909321784973, |
|
"eval_runtime": 151.3029, |
|
"eval_samples_per_second": 37.382, |
|
"eval_steps_per_second": 4.673, |
|
"eval_wer": 0.4563720691370705, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5558698727015559, |
|
"eval_loss": 0.5580226182937622, |
|
"eval_runtime": 150.4331, |
|
"eval_samples_per_second": 37.598, |
|
"eval_steps_per_second": 4.7, |
|
"eval_wer": 0.43342267015454733, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"eval_loss": 0.5395969748497009, |
|
"eval_runtime": 152.4347, |
|
"eval_samples_per_second": 37.104, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.4204073117106129, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8387553041018387, |
|
"eval_loss": 0.5091754794120789, |
|
"eval_runtime": 150.7597, |
|
"eval_samples_per_second": 37.517, |
|
"eval_steps_per_second": 4.69, |
|
"eval_wer": 0.4033316749851551, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"eval_loss": 0.4941596984863281, |
|
"eval_runtime": 150.8584, |
|
"eval_samples_per_second": 37.492, |
|
"eval_steps_per_second": 4.687, |
|
"eval_wer": 0.39033236507197766, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"grad_norm": 0.8058004379272461, |
|
"learning_rate": 0.0001582857142857143, |
|
"loss": 0.6486, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"eval_loss": 0.4773481488227844, |
|
"eval_runtime": 151.2881, |
|
"eval_samples_per_second": 37.386, |
|
"eval_steps_per_second": 4.673, |
|
"eval_wer": 0.37783055961226747, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"eval_loss": 0.472769170999527, |
|
"eval_runtime": 152.6271, |
|
"eval_samples_per_second": 37.058, |
|
"eval_steps_per_second": 4.632, |
|
"eval_wer": 0.3649756864759031, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4045261669024045, |
|
"eval_loss": 0.46484073996543884, |
|
"eval_runtime": 150.7659, |
|
"eval_samples_per_second": 37.515, |
|
"eval_steps_per_second": 4.689, |
|
"eval_wer": 0.3623276789009966, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.545968882602546, |
|
"eval_loss": 0.4558813273906708, |
|
"eval_runtime": 151.5484, |
|
"eval_samples_per_second": 37.321, |
|
"eval_steps_per_second": 4.665, |
|
"eval_wer": 0.3528429972236042, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6874115983026874, |
|
"eval_loss": 0.4480243921279907, |
|
"eval_runtime": 151.9127, |
|
"eval_samples_per_second": 37.232, |
|
"eval_steps_per_second": 4.654, |
|
"eval_wer": 0.3526825119160341, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 0.6075835227966309, |
|
"learning_rate": 8.728571428571428e-05, |
|
"loss": 0.5049, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"eval_loss": 0.438320130109787, |
|
"eval_runtime": 151.7706, |
|
"eval_samples_per_second": 37.267, |
|
"eval_steps_per_second": 4.658, |
|
"eval_wer": 0.3383511739500249, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9702970297029703, |
|
"eval_loss": 0.4345419108867645, |
|
"eval_runtime": 151.6805, |
|
"eval_samples_per_second": 37.289, |
|
"eval_steps_per_second": 4.661, |
|
"eval_wer": 0.33549453547527724, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1117397454031117, |
|
"eval_loss": 0.4344736337661743, |
|
"eval_runtime": 151.8533, |
|
"eval_samples_per_second": 37.246, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.3300219864871371, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.253182461103253, |
|
"eval_loss": 0.42980891466140747, |
|
"eval_runtime": 151.7848, |
|
"eval_samples_per_second": 37.263, |
|
"eval_steps_per_second": 4.658, |
|
"eval_wer": 0.3271813965431465, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.3946251768033946, |
|
"eval_loss": 0.4292232096195221, |
|
"eval_runtime": 151.8596, |
|
"eval_samples_per_second": 37.245, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.324565486029754, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"grad_norm": 0.6722184419631958, |
|
"learning_rate": 1.614285714285714e-05, |
|
"loss": 0.4131, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"eval_loss": 0.42547932267189026, |
|
"eval_runtime": 152.3556, |
|
"eval_samples_per_second": 37.124, |
|
"eval_steps_per_second": 4.64, |
|
"eval_wer": 0.32316926385389416, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.6775106082036775, |
|
"eval_loss": 0.4231884181499481, |
|
"eval_runtime": 151.3029, |
|
"eval_samples_per_second": 37.382, |
|
"eval_steps_per_second": 4.673, |
|
"eval_wer": 0.3216286049012213, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.6775106082036775, |
|
"step": 2600, |
|
"total_flos": 9.931787749443291e+18, |
|
"train_loss": 1.1254758776151217, |
|
"train_runtime": 6709.1009, |
|
"train_samples_per_second": 12.401, |
|
"train_steps_per_second": 0.388 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 400, |
|
"total_flos": 9.931787749443291e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|