{ "best_metric": 0.3040466010570526, "best_model_checkpoint": "./enko_xlsr_13p_run1/checkpoint-77020", "epoch": 10.0, "eval_steps": 500, "global_step": 77020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 0.0003, "loss": 4.7797, "step": 500 }, { "epoch": 0.13, "learning_rate": 0.00029803972817564033, "loss": 1.2377, "step": 1000 }, { "epoch": 0.19, "learning_rate": 0.0002960794563512807, "loss": 0.9452, "step": 1500 }, { "epoch": 0.26, "learning_rate": 0.000294119184526921, "loss": 0.8622, "step": 2000 }, { "epoch": 0.32, "learning_rate": 0.0002921589127025614, "loss": 0.82, "step": 2500 }, { "epoch": 0.39, "learning_rate": 0.0002901986408782018, "loss": 0.7742, "step": 3000 }, { "epoch": 0.45, "learning_rate": 0.00028823836905384213, "loss": 0.75, "step": 3500 }, { "epoch": 0.52, "learning_rate": 0.0002862780972294825, "loss": 0.7459, "step": 4000 }, { "epoch": 0.58, "learning_rate": 0.00028431782540512285, "loss": 0.7282, "step": 4500 }, { "epoch": 0.65, "learning_rate": 0.00028235755358076316, "loss": 0.7104, "step": 5000 }, { "epoch": 0.71, "learning_rate": 0.0002803972817564035, "loss": 0.692, "step": 5500 }, { "epoch": 0.78, "learning_rate": 0.0002784370099320439, "loss": 0.6954, "step": 6000 }, { "epoch": 0.84, "learning_rate": 0.00027647673810768424, "loss": 0.6844, "step": 6500 }, { "epoch": 0.91, "learning_rate": 0.0002745164662833246, "loss": 0.6779, "step": 7000 }, { "epoch": 0.97, "learning_rate": 0.00027255619445896496, "loss": 0.6595, "step": 7500 }, { "epoch": 1.0, "eval_loss": 0.44951537251472473, "eval_runtime": 11.7967, "eval_samples_per_second": 60.271, "eval_steps_per_second": 7.544, "eval_wer": 0.297432521395655, "step": 7702 }, { "epoch": 1.04, "learning_rate": 0.0002705959226346053, "loss": 0.6426, "step": 8000 }, { "epoch": 1.1, "learning_rate": 0.0002686356508102457, "loss": 0.6215, "step": 8500 }, { "epoch": 1.17, "learning_rate": 0.00026667537898588604, "loss": 0.6209, "step": 9000 }, { "epoch": 1.23, "learning_rate": 0.0002647151071615264, "loss": 0.6134, "step": 9500 }, { "epoch": 1.3, "learning_rate": 0.00026275483533716676, "loss": 0.6127, "step": 10000 }, { "epoch": 1.36, "learning_rate": 0.00026079456351280707, "loss": 0.5987, "step": 10500 }, { "epoch": 1.43, "learning_rate": 0.0002588342916884474, "loss": 0.6028, "step": 11000 }, { "epoch": 1.49, "learning_rate": 0.0002568740198640878, "loss": 0.5893, "step": 11500 }, { "epoch": 1.56, "learning_rate": 0.00025491374803972815, "loss": 0.59, "step": 12000 }, { "epoch": 1.62, "learning_rate": 0.0002529534762153685, "loss": 0.594, "step": 12500 }, { "epoch": 1.69, "learning_rate": 0.00025099320439100887, "loss": 0.5903, "step": 13000 }, { "epoch": 1.75, "learning_rate": 0.0002490329325666492, "loss": 0.5779, "step": 13500 }, { "epoch": 1.82, "learning_rate": 0.0002470726607422896, "loss": 0.582, "step": 14000 }, { "epoch": 1.88, "learning_rate": 0.00024511238891792995, "loss": 0.5781, "step": 14500 }, { "epoch": 1.95, "learning_rate": 0.0002431521170935703, "loss": 0.5717, "step": 15000 }, { "epoch": 2.0, "eval_loss": 0.39816993474960327, "eval_runtime": 11.9909, "eval_samples_per_second": 59.295, "eval_steps_per_second": 7.422, "eval_wer": 0.25622119815668204, "step": 15404 }, { "epoch": 2.01, "learning_rate": 0.00024119184526921064, "loss": 0.5632, "step": 15500 }, { "epoch": 2.08, "learning_rate": 0.000239231573444851, "loss": 0.5298, "step": 16000 }, { "epoch": 2.14, "learning_rate": 0.00023727130162049136, "loss": 0.5391, "step": 16500 }, { "epoch": 2.21, "learning_rate": 0.00023531102979613172, "loss": 0.5274, "step": 17000 }, { "epoch": 2.27, "learning_rate": 0.00023335075797177208, "loss": 0.5368, "step": 17500 }, { "epoch": 2.34, "learning_rate": 0.00023139048614741244, "loss": 0.5191, "step": 18000 }, { "epoch": 2.4, "learning_rate": 0.00022943021432305277, "loss": 0.523, "step": 18500 }, { "epoch": 2.47, "learning_rate": 0.00022746994249869313, "loss": 0.529, "step": 19000 }, { "epoch": 2.53, "learning_rate": 0.0002255096706743335, "loss": 0.5293, "step": 19500 }, { "epoch": 2.6, "learning_rate": 0.00022354939884997385, "loss": 0.5091, "step": 20000 }, { "epoch": 2.66, "learning_rate": 0.0002215891270256142, "loss": 0.5258, "step": 20500 }, { "epoch": 2.73, "learning_rate": 0.00021962885520125454, "loss": 0.5107, "step": 21000 }, { "epoch": 2.79, "learning_rate": 0.0002176685833768949, "loss": 0.5197, "step": 21500 }, { "epoch": 2.86, "learning_rate": 0.00021570831155253526, "loss": 0.5162, "step": 22000 }, { "epoch": 2.92, "learning_rate": 0.00021374803972817562, "loss": 0.5162, "step": 22500 }, { "epoch": 2.99, "learning_rate": 0.00021178776790381598, "loss": 0.5134, "step": 23000 }, { "epoch": 3.0, "eval_loss": 0.3769395649433136, "eval_runtime": 12.4234, "eval_samples_per_second": 57.231, "eval_steps_per_second": 7.164, "eval_wer": 0.23647136273864383, "step": 23106 }, { "epoch": 3.05, "learning_rate": 0.00020982749607945634, "loss": 0.4797, "step": 23500 }, { "epoch": 3.12, "learning_rate": 0.00020786722425509668, "loss": 0.4777, "step": 24000 }, { "epoch": 3.18, "learning_rate": 0.00020590695243073704, "loss": 0.4729, "step": 24500 }, { "epoch": 3.25, "learning_rate": 0.0002039466806063774, "loss": 0.4749, "step": 25000 }, { "epoch": 3.31, "learning_rate": 0.00020198640878201776, "loss": 0.4803, "step": 25500 }, { "epoch": 3.38, "learning_rate": 0.00020002613695765812, "loss": 0.4738, "step": 26000 }, { "epoch": 3.44, "learning_rate": 0.00019806586513329845, "loss": 0.4782, "step": 26500 }, { "epoch": 3.51, "learning_rate": 0.0001961055933089388, "loss": 0.4994, "step": 27000 }, { "epoch": 3.57, "learning_rate": 0.00019414532148457917, "loss": 0.4676, "step": 27500 }, { "epoch": 3.64, "learning_rate": 0.00019218504966021953, "loss": 0.457, "step": 28000 }, { "epoch": 3.7, "learning_rate": 0.0001902247778358599, "loss": 0.469, "step": 28500 }, { "epoch": 3.77, "learning_rate": 0.00018826450601150025, "loss": 0.4573, "step": 29000 }, { "epoch": 3.83, "learning_rate": 0.00018630423418714058, "loss": 0.4672, "step": 29500 }, { "epoch": 3.9, "learning_rate": 0.00018434396236278094, "loss": 0.4736, "step": 30000 }, { "epoch": 3.96, "learning_rate": 0.0001823836905384213, "loss": 0.467, "step": 30500 }, { "epoch": 4.0, "eval_loss": 0.34988921880722046, "eval_runtime": 12.3506, "eval_samples_per_second": 57.568, "eval_steps_per_second": 7.206, "eval_wer": 0.22027649769585253, "step": 30808 }, { "epoch": 4.02, "learning_rate": 0.00018042341871406166, "loss": 0.4541, "step": 31000 }, { "epoch": 4.09, "learning_rate": 0.00017846314688970202, "loss": 0.4273, "step": 31500 }, { "epoch": 4.15, "learning_rate": 0.00017650287506534236, "loss": 0.4334, "step": 32000 }, { "epoch": 4.22, "learning_rate": 0.00017454260324098272, "loss": 0.4203, "step": 32500 }, { "epoch": 4.28, "learning_rate": 0.00017258233141662308, "loss": 0.4331, "step": 33000 }, { "epoch": 4.35, "learning_rate": 0.00017062205959226344, "loss": 0.4306, "step": 33500 }, { "epoch": 4.41, "learning_rate": 0.00016866178776790382, "loss": 0.4251, "step": 34000 }, { "epoch": 4.48, "learning_rate": 0.00016670151594354418, "loss": 0.4231, "step": 34500 }, { "epoch": 4.54, "learning_rate": 0.0001647412441191845, "loss": 0.4256, "step": 35000 }, { "epoch": 4.61, "learning_rate": 0.00016278097229482488, "loss": 0.4332, "step": 35500 }, { "epoch": 4.67, "learning_rate": 0.00016082070047046524, "loss": 0.4238, "step": 36000 }, { "epoch": 4.74, "learning_rate": 0.0001588604286461056, "loss": 0.4251, "step": 36500 }, { "epoch": 4.8, "learning_rate": 0.00015690015682174596, "loss": 0.425, "step": 37000 }, { "epoch": 4.87, "learning_rate": 0.0001549398849973863, "loss": 0.4208, "step": 37500 }, { "epoch": 4.93, "learning_rate": 0.00015297961317302665, "loss": 0.4245, "step": 38000 }, { "epoch": 5.0, "learning_rate": 0.000151019341348667, "loss": 0.4156, "step": 38500 }, { "epoch": 5.0, "eval_loss": 0.3391418159008026, "eval_runtime": 12.5768, "eval_samples_per_second": 56.533, "eval_steps_per_second": 7.077, "eval_wer": 0.21158657011191573, "step": 38510 }, { "epoch": 5.06, "learning_rate": 0.00014905906952430737, "loss": 0.3845, "step": 39000 }, { "epoch": 5.13, "learning_rate": 0.0001470987976999477, "loss": 0.393, "step": 39500 }, { "epoch": 5.19, "learning_rate": 0.00014513852587558806, "loss": 0.3938, "step": 40000 }, { "epoch": 5.26, "learning_rate": 0.00014317825405122842, "loss": 0.3902, "step": 40500 }, { "epoch": 5.32, "learning_rate": 0.00014121798222686878, "loss": 0.3817, "step": 41000 }, { "epoch": 5.39, "learning_rate": 0.00013925771040250914, "loss": 0.3937, "step": 41500 }, { "epoch": 5.45, "learning_rate": 0.0001372974385781495, "loss": 0.3821, "step": 42000 }, { "epoch": 5.52, "learning_rate": 0.00013533716675378984, "loss": 0.3903, "step": 42500 }, { "epoch": 5.58, "learning_rate": 0.0001333768949294302, "loss": 0.381, "step": 43000 }, { "epoch": 5.65, "learning_rate": 0.00013141662310507056, "loss": 0.3866, "step": 43500 }, { "epoch": 5.71, "learning_rate": 0.00012945635128071092, "loss": 0.3816, "step": 44000 }, { "epoch": 5.78, "learning_rate": 0.00012749607945635128, "loss": 0.3845, "step": 44500 }, { "epoch": 5.84, "learning_rate": 0.0001255358076319916, "loss": 0.3777, "step": 45000 }, { "epoch": 5.91, "learning_rate": 0.00012357553580763197, "loss": 0.3777, "step": 45500 }, { "epoch": 5.97, "learning_rate": 0.00012161526398327234, "loss": 0.379, "step": 46000 }, { "epoch": 6.0, "eval_loss": 0.3327275514602661, "eval_runtime": 12.31, "eval_samples_per_second": 57.758, "eval_steps_per_second": 7.23, "eval_wer": 0.1998683344305464, "step": 46212 }, { "epoch": 6.04, "learning_rate": 0.00011965499215891269, "loss": 0.3657, "step": 46500 }, { "epoch": 6.1, "learning_rate": 0.00011769472033455305, "loss": 0.3527, "step": 47000 }, { "epoch": 6.17, "learning_rate": 0.00011573444851019341, "loss": 0.3505, "step": 47500 }, { "epoch": 6.23, "learning_rate": 0.00011377417668583375, "loss": 0.3472, "step": 48000 }, { "epoch": 6.3, "learning_rate": 0.00011181390486147411, "loss": 0.3497, "step": 48500 }, { "epoch": 6.36, "learning_rate": 0.00010985363303711446, "loss": 0.349, "step": 49000 }, { "epoch": 6.43, "learning_rate": 0.00010789336121275482, "loss": 0.3545, "step": 49500 }, { "epoch": 6.49, "learning_rate": 0.00010593308938839518, "loss": 0.3453, "step": 50000 }, { "epoch": 6.56, "learning_rate": 0.00010397281756403553, "loss": 0.3501, "step": 50500 }, { "epoch": 6.62, "learning_rate": 0.00010201254573967589, "loss": 0.3473, "step": 51000 }, { "epoch": 6.69, "learning_rate": 0.00010005227391531626, "loss": 0.3462, "step": 51500 }, { "epoch": 6.75, "learning_rate": 9.80920020909566e-05, "loss": 0.3385, "step": 52000 }, { "epoch": 6.82, "learning_rate": 9.613173026659697e-05, "loss": 0.3459, "step": 52500 }, { "epoch": 6.88, "learning_rate": 9.417145844223731e-05, "loss": 0.3441, "step": 53000 }, { "epoch": 6.95, "learning_rate": 9.221118661787767e-05, "loss": 0.3475, "step": 53500 }, { "epoch": 7.0, "eval_loss": 0.31270119547843933, "eval_runtime": 12.4561, "eval_samples_per_second": 57.08, "eval_steps_per_second": 7.145, "eval_wer": 0.1947333772218565, "step": 53914 }, { "epoch": 7.01, "learning_rate": 9.025091479351803e-05, "loss": 0.3379, "step": 54000 }, { "epoch": 7.08, "learning_rate": 8.829064296915838e-05, "loss": 0.3159, "step": 54500 }, { "epoch": 7.14, "learning_rate": 8.633037114479874e-05, "loss": 0.3254, "step": 55000 }, { "epoch": 7.21, "learning_rate": 8.43700993204391e-05, "loss": 0.3195, "step": 55500 }, { "epoch": 7.27, "learning_rate": 8.240982749607945e-05, "loss": 0.3225, "step": 56000 }, { "epoch": 7.34, "learning_rate": 8.044955567171981e-05, "loss": 0.3128, "step": 56500 }, { "epoch": 7.4, "learning_rate": 7.848928384736017e-05, "loss": 0.318, "step": 57000 }, { "epoch": 7.47, "learning_rate": 7.652901202300051e-05, "loss": 0.3165, "step": 57500 }, { "epoch": 7.53, "learning_rate": 7.456874019864087e-05, "loss": 0.3191, "step": 58000 }, { "epoch": 7.6, "learning_rate": 7.260846837428123e-05, "loss": 0.3169, "step": 58500 }, { "epoch": 7.66, "learning_rate": 7.064819654992158e-05, "loss": 0.3118, "step": 59000 }, { "epoch": 7.73, "learning_rate": 6.868792472556194e-05, "loss": 0.3192, "step": 59500 }, { "epoch": 7.79, "learning_rate": 6.67276529012023e-05, "loss": 0.3055, "step": 60000 }, { "epoch": 7.86, "learning_rate": 6.476738107684265e-05, "loss": 0.3134, "step": 60500 }, { "epoch": 7.92, "learning_rate": 6.2807109252483e-05, "loss": 0.3081, "step": 61000 }, { "epoch": 7.98, "learning_rate": 6.084683742812336e-05, "loss": 0.3105, "step": 61500 }, { "epoch": 8.0, "eval_loss": 0.30813026428222656, "eval_runtime": 12.2002, "eval_samples_per_second": 58.278, "eval_steps_per_second": 7.295, "eval_wer": 0.1814351547070441, "step": 61616 }, { "epoch": 8.05, "learning_rate": 5.888656560376372e-05, "loss": 0.2986, "step": 62000 }, { "epoch": 8.11, "learning_rate": 5.692629377940407e-05, "loss": 0.2908, "step": 62500 }, { "epoch": 8.18, "learning_rate": 5.4966021955044426e-05, "loss": 0.2835, "step": 63000 }, { "epoch": 8.24, "learning_rate": 5.3005750130684786e-05, "loss": 0.2904, "step": 63500 }, { "epoch": 8.31, "learning_rate": 5.104547830632514e-05, "loss": 0.289, "step": 64000 }, { "epoch": 8.37, "learning_rate": 4.908520648196549e-05, "loss": 0.2905, "step": 64500 }, { "epoch": 8.44, "learning_rate": 4.7124934657605846e-05, "loss": 0.288, "step": 65000 }, { "epoch": 8.5, "learning_rate": 4.516466283324621e-05, "loss": 0.2813, "step": 65500 }, { "epoch": 8.57, "learning_rate": 4.3204391008886566e-05, "loss": 0.2831, "step": 66000 }, { "epoch": 8.63, "learning_rate": 4.124411918452692e-05, "loss": 0.2914, "step": 66500 }, { "epoch": 8.7, "learning_rate": 3.928384736016727e-05, "loss": 0.2807, "step": 67000 }, { "epoch": 8.76, "learning_rate": 3.7323575535807626e-05, "loss": 0.2833, "step": 67500 }, { "epoch": 8.83, "learning_rate": 3.5363303711447986e-05, "loss": 0.2825, "step": 68000 }, { "epoch": 8.89, "learning_rate": 3.340303188708834e-05, "loss": 0.2824, "step": 68500 }, { "epoch": 8.96, "learning_rate": 3.14427600627287e-05, "loss": 0.281, "step": 69000 }, { "epoch": 9.0, "eval_loss": 0.3067900538444519, "eval_runtime": 12.6252, "eval_samples_per_second": 56.316, "eval_steps_per_second": 7.049, "eval_wer": 0.17419354838709677, "step": 69318 }, { "epoch": 9.02, "learning_rate": 2.948248823836905e-05, "loss": 0.2725, "step": 69500 }, { "epoch": 9.09, "learning_rate": 2.7522216414009406e-05, "loss": 0.2696, "step": 70000 }, { "epoch": 9.15, "learning_rate": 2.5561944589649766e-05, "loss": 0.2713, "step": 70500 }, { "epoch": 9.22, "learning_rate": 2.360167276529012e-05, "loss": 0.2634, "step": 71000 }, { "epoch": 9.28, "learning_rate": 2.1641400940930475e-05, "loss": 0.2635, "step": 71500 }, { "epoch": 9.35, "learning_rate": 1.968112911657083e-05, "loss": 0.2658, "step": 72000 }, { "epoch": 9.41, "learning_rate": 1.7720857292211185e-05, "loss": 0.2648, "step": 72500 }, { "epoch": 9.48, "learning_rate": 1.576058546785154e-05, "loss": 0.273, "step": 73000 }, { "epoch": 9.54, "learning_rate": 1.3800313643491895e-05, "loss": 0.2601, "step": 73500 }, { "epoch": 9.61, "learning_rate": 1.1840041819132252e-05, "loss": 0.2598, "step": 74000 }, { "epoch": 9.67, "learning_rate": 9.879769994772608e-06, "loss": 0.2698, "step": 74500 }, { "epoch": 9.74, "learning_rate": 7.919498170412963e-06, "loss": 0.2574, "step": 75000 }, { "epoch": 9.8, "learning_rate": 5.959226346053318e-06, "loss": 0.2646, "step": 75500 }, { "epoch": 9.87, "learning_rate": 3.998954521693675e-06, "loss": 0.2584, "step": 76000 }, { "epoch": 9.93, "learning_rate": 2.0386826973340303e-06, "loss": 0.2623, "step": 76500 }, { "epoch": 10.0, "learning_rate": 7.841087297438577e-08, "loss": 0.2584, "step": 77000 }, { "epoch": 10.0, "eval_loss": 0.3040466010570526, "eval_runtime": 12.4236, "eval_samples_per_second": 57.23, "eval_steps_per_second": 7.164, "eval_wer": 0.17129690585911783, "step": 77020 }, { "epoch": 10.0, "step": 77020, "total_flos": 1.409382714117005e+20, "train_loss": 0.4655882824058503, "train_runtime": 41869.3879, "train_samples_per_second": 29.431, "train_steps_per_second": 1.84 } ], "logging_steps": 500, "max_steps": 77020, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.409382714117005e+20, "trial_name": null, "trial_params": null }