{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.6775106082036775, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 3.605041265487671, "eval_runtime": 151.6851, "eval_samples_per_second": 37.288, "eval_steps_per_second": 4.661, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 2.980067491531372, "eval_runtime": 149.8181, "eval_samples_per_second": 37.752, "eval_steps_per_second": 4.719, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 2.634444236755371, "eval_runtime": 149.2391, "eval_samples_per_second": 37.899, "eval_steps_per_second": 4.737, "eval_wer": 0.9829243632745422, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 1.1875977516174316, "eval_runtime": 149.4882, "eval_samples_per_second": 37.836, "eval_steps_per_second": 4.729, "eval_wer": 0.8078348927155719, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 1.7146128416061401, "learning_rate": 0.00029699999999999996, "loss": 3.377, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 1.006009578704834, "eval_runtime": 150.0094, "eval_samples_per_second": 37.704, "eval_steps_per_second": 4.713, "eval_wer": 0.7693826130217779, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 0.8393504619598389, "eval_runtime": 150.017, "eval_samples_per_second": 37.702, "eval_steps_per_second": 4.713, "eval_wer": 0.6380895828986857, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 0.7753380537033081, "eval_runtime": 150.3627, "eval_samples_per_second": 37.616, "eval_steps_per_second": 4.702, "eval_wer": 0.5870552550913963, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 0.6410965919494629, "eval_runtime": 150.6587, "eval_samples_per_second": 37.542, "eval_steps_per_second": 4.693, "eval_wer": 0.4923207780327711, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 0.632188618183136, "eval_runtime": 150.208, "eval_samples_per_second": 37.654, "eval_steps_per_second": 4.707, "eval_wer": 0.5022869156328739, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 0.9841262102127075, "learning_rate": 0.00022928571428571426, "loss": 0.8318, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 0.5857909321784973, "eval_runtime": 151.3029, "eval_samples_per_second": 37.382, "eval_steps_per_second": 4.673, "eval_wer": 0.4563720691370705, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 0.5580226182937622, "eval_runtime": 150.4331, "eval_samples_per_second": 37.598, "eval_steps_per_second": 4.7, "eval_wer": 0.43342267015454733, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 0.5395969748497009, "eval_runtime": 152.4347, "eval_samples_per_second": 37.104, "eval_steps_per_second": 4.638, "eval_wer": 0.4204073117106129, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 0.5091754794120789, "eval_runtime": 150.7597, "eval_samples_per_second": 37.517, "eval_steps_per_second": 4.69, "eval_wer": 0.4033316749851551, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 0.4941596984863281, "eval_runtime": 150.8584, "eval_samples_per_second": 37.492, "eval_steps_per_second": 4.687, "eval_wer": 0.39033236507197766, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 0.8058004379272461, "learning_rate": 0.0001582857142857143, "loss": 0.6486, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 0.4773481488227844, "eval_runtime": 151.2881, "eval_samples_per_second": 37.386, "eval_steps_per_second": 4.673, "eval_wer": 0.37783055961226747, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 0.472769170999527, "eval_runtime": 152.6271, "eval_samples_per_second": 37.058, "eval_steps_per_second": 4.632, "eval_wer": 0.3649756864759031, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 0.46484073996543884, "eval_runtime": 150.7659, "eval_samples_per_second": 37.515, "eval_steps_per_second": 4.689, "eval_wer": 0.3623276789009966, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 0.4558813273906708, "eval_runtime": 151.5484, "eval_samples_per_second": 37.321, "eval_steps_per_second": 4.665, "eval_wer": 0.3528429972236042, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 0.4480243921279907, "eval_runtime": 151.9127, "eval_samples_per_second": 37.232, "eval_steps_per_second": 4.654, "eval_wer": 0.3526825119160341, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 0.6075835227966309, "learning_rate": 8.728571428571428e-05, "loss": 0.5049, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 0.438320130109787, "eval_runtime": 151.7706, "eval_samples_per_second": 37.267, "eval_steps_per_second": 4.658, "eval_wer": 0.3383511739500249, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 0.4345419108867645, "eval_runtime": 151.6805, "eval_samples_per_second": 37.289, "eval_steps_per_second": 4.661, "eval_wer": 0.33549453547527724, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 0.4344736337661743, "eval_runtime": 151.8533, "eval_samples_per_second": 37.246, "eval_steps_per_second": 4.656, "eval_wer": 0.3300219864871371, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 0.42980891466140747, "eval_runtime": 151.7848, "eval_samples_per_second": 37.263, "eval_steps_per_second": 4.658, "eval_wer": 0.3271813965431465, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 0.4292232096195221, "eval_runtime": 151.8596, "eval_samples_per_second": 37.245, "eval_steps_per_second": 4.656, "eval_wer": 0.324565486029754, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.6722184419631958, "learning_rate": 1.614285714285714e-05, "loss": 0.4131, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 0.42547932267189026, "eval_runtime": 152.3556, "eval_samples_per_second": 37.124, "eval_steps_per_second": 4.64, "eval_wer": 0.32316926385389416, "step": 2500 }, { "epoch": 3.6775106082036775, "eval_loss": 0.4231884181499481, "eval_runtime": 151.3029, "eval_samples_per_second": 37.382, "eval_steps_per_second": 4.673, "eval_wer": 0.3216286049012213, "step": 2600 }, { "epoch": 3.6775106082036775, "step": 2600, "total_flos": 9.931787749443291e+18, "train_loss": 1.1254758776151217, "train_runtime": 6709.1009, "train_samples_per_second": 12.401, "train_steps_per_second": 0.388 } ], "logging_steps": 500, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 9.931787749443291e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }