diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7520 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 193.21963394342762, + "global_step": 464500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21, + "learning_rate": 1.997920133111481e-05, + "loss": 3.456, + "step": 500 + }, + { + "epoch": 0.42, + "learning_rate": 1.995840266222962e-05, + "loss": 1.8719, + "step": 1000 + }, + { + "epoch": 0.62, + "learning_rate": 1.9937603993344426e-05, + "loss": 1.7095, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 1.9916805324459236e-05, + "loss": 1.6155, + "step": 2000 + }, + { + "epoch": 1.0, + "eval_cer": 0.8924, + "eval_gen_len": 19.884, + "eval_loss": 1.4984486103057861, + "eval_runtime": 548.3383, + "eval_samples_per_second": 5.847, + "eval_steps_per_second": 1.463, + "step": 2404 + }, + { + "epoch": 1.04, + "learning_rate": 1.9896006655574045e-05, + "loss": 1.5555, + "step": 2500 + }, + { + "epoch": 1.25, + "learning_rate": 1.9875207986688854e-05, + "loss": 1.4646, + "step": 3000 + }, + { + "epoch": 1.46, + "learning_rate": 1.9854409317803664e-05, + "loss": 1.4079, + "step": 3500 + }, + { + "epoch": 1.66, + "learning_rate": 1.983361064891847e-05, + "loss": 1.3488, + "step": 4000 + }, + { + "epoch": 1.87, + "learning_rate": 1.981281198003328e-05, + "loss": 1.3039, + "step": 4500 + }, + { + "epoch": 2.0, + "eval_cer": 0.5947, + "eval_gen_len": 15.5827, + "eval_loss": 1.144504427909851, + "eval_runtime": 282.2519, + "eval_samples_per_second": 11.359, + "eval_steps_per_second": 2.841, + "step": 4808 + }, + { + "epoch": 2.08, + "learning_rate": 1.979201331114809e-05, + "loss": 1.2269, + "step": 5000 + }, + { + "epoch": 2.29, + "learning_rate": 1.9771214642262898e-05, + "loss": 1.1561, + "step": 5500 + }, + { + "epoch": 2.5, + "learning_rate": 1.9750415973377707e-05, + "loss": 1.1288, + "step": 6000 + }, + { + "epoch": 2.7, + "learning_rate": 1.9729700499168054e-05, + "loss": 1.0835, + "step": 6500 + }, + { + "epoch": 2.91, + "learning_rate": 1.9708901830282864e-05, + "loss": 1.0647, + "step": 7000 + }, + { + "epoch": 3.0, + "eval_cer": 0.4993, + "eval_gen_len": 14.9208, + "eval_loss": 0.9073113799095154, + "eval_runtime": 267.439, + "eval_samples_per_second": 11.988, + "eval_steps_per_second": 2.999, + "step": 7212 + }, + { + "epoch": 3.12, + "learning_rate": 1.9688103161397673e-05, + "loss": 0.9848, + "step": 7500 + }, + { + "epoch": 3.33, + "learning_rate": 1.966730449251248e-05, + "loss": 0.9265, + "step": 8000 + }, + { + "epoch": 3.54, + "learning_rate": 1.964650582362729e-05, + "loss": 0.9057, + "step": 8500 + }, + { + "epoch": 3.74, + "learning_rate": 1.9625707154742098e-05, + "loss": 0.8799, + "step": 9000 + }, + { + "epoch": 3.95, + "learning_rate": 1.9604950083194678e-05, + "loss": 0.8491, + "step": 9500 + }, + { + "epoch": 4.0, + "eval_cer": 0.4462, + "eval_gen_len": 13.549, + "eval_loss": 0.7219734191894531, + "eval_runtime": 263.8796, + "eval_samples_per_second": 12.149, + "eval_steps_per_second": 3.039, + "step": 9616 + }, + { + "epoch": 4.16, + "learning_rate": 1.9584151414309487e-05, + "loss": 0.7793, + "step": 10000 + }, + { + "epoch": 4.37, + "learning_rate": 1.9563352745424296e-05, + "loss": 0.7483, + "step": 10500 + }, + { + "epoch": 4.58, + "learning_rate": 1.9542554076539106e-05, + "loss": 0.732, + "step": 11000 + }, + { + "epoch": 4.78, + "learning_rate": 1.9521755407653912e-05, + "loss": 0.7186, + "step": 11500 + }, + { + "epoch": 4.99, + "learning_rate": 1.9500956738768718e-05, + "loss": 0.7024, + "step": 12000 + }, + { + "epoch": 5.0, + "eval_cer": 0.4716, + "eval_gen_len": 15.7813, + "eval_loss": 0.6081481575965881, + "eval_runtime": 336.6018, + "eval_samples_per_second": 9.525, + "eval_steps_per_second": 2.383, + "step": 12020 + }, + { + "epoch": 5.2, + "learning_rate": 1.9480158069883527e-05, + "loss": 0.6114, + "step": 12500 + }, + { + "epoch": 5.41, + "learning_rate": 1.945940099833611e-05, + "loss": 0.6193, + "step": 13000 + }, + { + "epoch": 5.62, + "learning_rate": 1.9438602329450916e-05, + "loss": 0.6114, + "step": 13500 + }, + { + "epoch": 5.82, + "learning_rate": 1.9417803660565726e-05, + "loss": 0.5797, + "step": 14000 + }, + { + "epoch": 6.0, + "eval_cer": 0.3726, + "eval_gen_len": 13.8163, + "eval_loss": 0.5052656531333923, + "eval_runtime": 259.2943, + "eval_samples_per_second": 12.364, + "eval_steps_per_second": 3.093, + "step": 14424 + }, + { + "epoch": 6.03, + "learning_rate": 1.9397004991680535e-05, + "loss": 0.5658, + "step": 14500 + }, + { + "epoch": 6.24, + "learning_rate": 1.9376206322795344e-05, + "loss": 0.52, + "step": 15000 + }, + { + "epoch": 6.45, + "learning_rate": 1.9355407653910154e-05, + "loss": 0.4992, + "step": 15500 + }, + { + "epoch": 6.66, + "learning_rate": 1.933460898502496e-05, + "loss": 0.5095, + "step": 16000 + }, + { + "epoch": 6.86, + "learning_rate": 1.931381031613977e-05, + "loss": 0.4966, + "step": 16500 + }, + { + "epoch": 7.0, + "eval_cer": 0.3997, + "eval_gen_len": 15.0505, + "eval_loss": 0.4698517620563507, + "eval_runtime": 299.617, + "eval_samples_per_second": 10.7, + "eval_steps_per_second": 2.677, + "step": 16828 + }, + { + "epoch": 7.07, + "learning_rate": 1.929305324459235e-05, + "loss": 0.4493, + "step": 17000 + }, + { + "epoch": 7.28, + "learning_rate": 1.9272254575707155e-05, + "loss": 0.4288, + "step": 17500 + }, + { + "epoch": 7.49, + "learning_rate": 1.9251455906821964e-05, + "loss": 0.4397, + "step": 18000 + }, + { + "epoch": 7.7, + "learning_rate": 1.9230657237936774e-05, + "loss": 0.423, + "step": 18500 + }, + { + "epoch": 7.9, + "learning_rate": 1.9209900166389354e-05, + "loss": 0.4234, + "step": 19000 + }, + { + "epoch": 8.0, + "eval_cer": 0.3414, + "eval_gen_len": 14.2976, + "eval_loss": 0.41973158717155457, + "eval_runtime": 277.9117, + "eval_samples_per_second": 11.536, + "eval_steps_per_second": 2.886, + "step": 19232 + }, + { + "epoch": 8.11, + "learning_rate": 1.918910149750416e-05, + "loss": 0.377, + "step": 19500 + }, + { + "epoch": 8.32, + "learning_rate": 1.916834442595674e-05, + "loss": 0.3612, + "step": 20000 + }, + { + "epoch": 8.53, + "learning_rate": 1.914754575707155e-05, + "loss": 0.3725, + "step": 20500 + }, + { + "epoch": 8.74, + "learning_rate": 1.9126747088186358e-05, + "loss": 0.3607, + "step": 21000 + }, + { + "epoch": 8.94, + "learning_rate": 1.9105948419301164e-05, + "loss": 0.3661, + "step": 21500 + }, + { + "epoch": 9.0, + "eval_cer": 0.3568, + "eval_gen_len": 14.5349, + "eval_loss": 0.36951112747192383, + "eval_runtime": 287.7236, + "eval_samples_per_second": 11.143, + "eval_steps_per_second": 2.787, + "step": 21636 + }, + { + "epoch": 9.15, + "learning_rate": 1.9085149750415974e-05, + "loss": 0.3287, + "step": 22000 + }, + { + "epoch": 9.36, + "learning_rate": 1.9064351081530783e-05, + "loss": 0.3185, + "step": 22500 + }, + { + "epoch": 9.57, + "learning_rate": 1.9043552412645592e-05, + "loss": 0.3082, + "step": 23000 + }, + { + "epoch": 9.78, + "learning_rate": 1.90227537437604e-05, + "loss": 0.3177, + "step": 23500 + }, + { + "epoch": 9.98, + "learning_rate": 1.9001996672212978e-05, + "loss": 0.3094, + "step": 24000 + }, + { + "epoch": 10.0, + "eval_cer": 0.3123, + "eval_gen_len": 13.6931, + "eval_loss": 0.35325565934181213, + "eval_runtime": 287.7264, + "eval_samples_per_second": 11.143, + "eval_steps_per_second": 2.787, + "step": 24040 + }, + { + "epoch": 10.19, + "learning_rate": 1.8981198003327787e-05, + "loss": 0.2623, + "step": 24500 + }, + { + "epoch": 10.4, + "learning_rate": 1.8960399334442597e-05, + "loss": 0.2666, + "step": 25000 + }, + { + "epoch": 10.61, + "learning_rate": 1.8939600665557406e-05, + "loss": 0.2667, + "step": 25500 + }, + { + "epoch": 10.82, + "learning_rate": 1.8918843594009986e-05, + "loss": 0.2695, + "step": 26000 + }, + { + "epoch": 11.0, + "eval_cer": 0.3036, + "eval_gen_len": 13.8085, + "eval_loss": 0.34151870012283325, + "eval_runtime": 267.5842, + "eval_samples_per_second": 11.981, + "eval_steps_per_second": 2.997, + "step": 26444 + }, + { + "epoch": 11.02, + "learning_rate": 1.8898086522462566e-05, + "loss": 0.2818, + "step": 26500 + }, + { + "epoch": 11.23, + "learning_rate": 1.8877287853577372e-05, + "loss": 0.24, + "step": 27000 + }, + { + "epoch": 11.44, + "learning_rate": 1.885648918469218e-05, + "loss": 0.2347, + "step": 27500 + }, + { + "epoch": 11.65, + "learning_rate": 1.883569051580699e-05, + "loss": 0.2372, + "step": 28000 + }, + { + "epoch": 11.86, + "learning_rate": 1.88148918469218e-05, + "loss": 0.242, + "step": 28500 + }, + { + "epoch": 12.0, + "eval_cer": 0.2954, + "eval_gen_len": 13.5817, + "eval_loss": 0.33137527108192444, + "eval_runtime": 249.8381, + "eval_samples_per_second": 12.832, + "eval_steps_per_second": 3.21, + "step": 28848 + }, + { + "epoch": 12.06, + "learning_rate": 1.8794093178036606e-05, + "loss": 0.2294, + "step": 29000 + }, + { + "epoch": 12.27, + "learning_rate": 1.8773336106489186e-05, + "loss": 0.2139, + "step": 29500 + }, + { + "epoch": 12.48, + "learning_rate": 1.8752537437603995e-05, + "loss": 0.2173, + "step": 30000 + }, + { + "epoch": 12.69, + "learning_rate": 1.8731738768718805e-05, + "loss": 0.214, + "step": 30500 + }, + { + "epoch": 12.9, + "learning_rate": 1.871094009983361e-05, + "loss": 0.2123, + "step": 31000 + }, + { + "epoch": 13.0, + "eval_cer": 0.2927, + "eval_gen_len": 13.8079, + "eval_loss": 0.3195803463459015, + "eval_runtime": 264.0497, + "eval_samples_per_second": 12.142, + "eval_steps_per_second": 3.037, + "step": 31252 + }, + { + "epoch": 13.1, + "learning_rate": 1.869014143094842e-05, + "loss": 0.191, + "step": 31500 + }, + { + "epoch": 13.31, + "learning_rate": 1.866934276206323e-05, + "loss": 0.187, + "step": 32000 + }, + { + "epoch": 13.52, + "learning_rate": 1.864854409317804e-05, + "loss": 0.1903, + "step": 32500 + }, + { + "epoch": 13.73, + "learning_rate": 1.8627745424292848e-05, + "loss": 0.1978, + "step": 33000 + }, + { + "epoch": 13.94, + "learning_rate": 1.8606946755407654e-05, + "loss": 0.1954, + "step": 33500 + }, + { + "epoch": 14.0, + "eval_cer": 0.2802, + "eval_gen_len": 13.5215, + "eval_loss": 0.3065420985221863, + "eval_runtime": 255.1945, + "eval_samples_per_second": 12.563, + "eval_steps_per_second": 3.143, + "step": 33656 + }, + { + "epoch": 14.14, + "learning_rate": 1.8586231281198004e-05, + "loss": 0.1658, + "step": 34000 + }, + { + "epoch": 14.35, + "learning_rate": 1.8565432612312814e-05, + "loss": 0.165, + "step": 34500 + }, + { + "epoch": 14.56, + "learning_rate": 1.854463394342762e-05, + "loss": 0.1673, + "step": 35000 + }, + { + "epoch": 14.77, + "learning_rate": 1.852383527454243e-05, + "loss": 0.174, + "step": 35500 + }, + { + "epoch": 14.98, + "learning_rate": 1.850303660565724e-05, + "loss": 0.1734, + "step": 36000 + }, + { + "epoch": 15.0, + "eval_cer": 0.301, + "eval_gen_len": 14.1818, + "eval_loss": 0.3554905652999878, + "eval_runtime": 265.2219, + "eval_samples_per_second": 12.088, + "eval_steps_per_second": 3.024, + "step": 36060 + }, + { + "epoch": 15.18, + "learning_rate": 1.8482237936772048e-05, + "loss": 0.1436, + "step": 36500 + }, + { + "epoch": 15.39, + "learning_rate": 1.8461439267886857e-05, + "loss": 0.1566, + "step": 37000 + }, + { + "epoch": 15.6, + "learning_rate": 1.8440640599001663e-05, + "loss": 0.1548, + "step": 37500 + }, + { + "epoch": 15.81, + "learning_rate": 1.8419883527454246e-05, + "loss": 0.1598, + "step": 38000 + }, + { + "epoch": 16.0, + "eval_cer": 0.2854, + "eval_gen_len": 13.975, + "eval_loss": 0.3254939913749695, + "eval_runtime": 272.7931, + "eval_samples_per_second": 11.752, + "eval_steps_per_second": 2.94, + "step": 38464 + }, + { + "epoch": 16.01, + "learning_rate": 1.8399084858569052e-05, + "loss": 0.1543, + "step": 38500 + }, + { + "epoch": 16.22, + "learning_rate": 1.8378286189683862e-05, + "loss": 0.136, + "step": 39000 + }, + { + "epoch": 16.43, + "learning_rate": 1.835748752079867e-05, + "loss": 0.1356, + "step": 39500 + }, + { + "epoch": 16.64, + "learning_rate": 1.833673044925125e-05, + "loss": 0.14, + "step": 40000 + }, + { + "epoch": 16.85, + "learning_rate": 1.8315931780366057e-05, + "loss": 0.1442, + "step": 40500 + }, + { + "epoch": 17.0, + "eval_cer": 0.2748, + "eval_gen_len": 13.4192, + "eval_loss": 0.3075180649757385, + "eval_runtime": 259.5551, + "eval_samples_per_second": 12.352, + "eval_steps_per_second": 3.09, + "step": 40868 + }, + { + "epoch": 17.05, + "learning_rate": 1.8295133111480866e-05, + "loss": 0.1417, + "step": 41000 + }, + { + "epoch": 17.26, + "learning_rate": 1.8274334442595676e-05, + "loss": 0.1241, + "step": 41500 + }, + { + "epoch": 17.47, + "learning_rate": 1.8253577371048256e-05, + "loss": 0.1254, + "step": 42000 + }, + { + "epoch": 17.68, + "learning_rate": 1.823277870216306e-05, + "loss": 0.1315, + "step": 42500 + }, + { + "epoch": 17.89, + "learning_rate": 1.821198003327787e-05, + "loss": 0.1288, + "step": 43000 + }, + { + "epoch": 18.0, + "eval_cer": 0.2763, + "eval_gen_len": 13.5976, + "eval_loss": 0.30095288157463074, + "eval_runtime": 261.6386, + "eval_samples_per_second": 12.254, + "eval_steps_per_second": 3.065, + "step": 43272 + }, + { + "epoch": 18.09, + "learning_rate": 1.819118136439268e-05, + "loss": 0.123, + "step": 43500 + }, + { + "epoch": 18.3, + "learning_rate": 1.817042429284526e-05, + "loss": 0.1178, + "step": 44000 + }, + { + "epoch": 18.51, + "learning_rate": 1.8149625623960066e-05, + "loss": 0.1203, + "step": 44500 + }, + { + "epoch": 18.72, + "learning_rate": 1.8128826955074876e-05, + "loss": 0.1206, + "step": 45000 + }, + { + "epoch": 18.93, + "learning_rate": 1.8108028286189685e-05, + "loss": 0.1249, + "step": 45500 + }, + { + "epoch": 19.0, + "eval_cer": 0.2781, + "eval_gen_len": 13.9969, + "eval_loss": 0.3196047246456146, + "eval_runtime": 267.3702, + "eval_samples_per_second": 11.991, + "eval_steps_per_second": 3.0, + "step": 45676 + }, + { + "epoch": 19.13, + "learning_rate": 1.8087271214642265e-05, + "loss": 0.1067, + "step": 46000 + }, + { + "epoch": 19.34, + "learning_rate": 1.8066514143094845e-05, + "loss": 0.106, + "step": 46500 + }, + { + "epoch": 19.55, + "learning_rate": 1.8045715474209654e-05, + "loss": 0.1083, + "step": 47000 + }, + { + "epoch": 19.76, + "learning_rate": 1.802491680532446e-05, + "loss": 0.1098, + "step": 47500 + }, + { + "epoch": 19.97, + "learning_rate": 1.800411813643927e-05, + "loss": 0.1182, + "step": 48000 + }, + { + "epoch": 20.0, + "eval_cer": 0.2783, + "eval_gen_len": 13.6728, + "eval_loss": 0.3209761679172516, + "eval_runtime": 262.3576, + "eval_samples_per_second": 12.22, + "eval_steps_per_second": 3.057, + "step": 48080 + }, + { + "epoch": 20.17, + "learning_rate": 1.798331946755408e-05, + "loss": 0.0975, + "step": 48500 + }, + { + "epoch": 20.38, + "learning_rate": 1.7962520798668888e-05, + "loss": 0.0975, + "step": 49000 + }, + { + "epoch": 20.59, + "learning_rate": 1.7941722129783698e-05, + "loss": 0.1024, + "step": 49500 + }, + { + "epoch": 20.8, + "learning_rate": 1.7920923460898504e-05, + "loss": 0.1056, + "step": 50000 + }, + { + "epoch": 21.0, + "eval_cer": 0.2707, + "eval_gen_len": 13.5714, + "eval_loss": 0.3157811760902405, + "eval_runtime": 260.7596, + "eval_samples_per_second": 12.295, + "eval_steps_per_second": 3.076, + "step": 50484 + }, + { + "epoch": 21.01, + "learning_rate": 1.7900166389351083e-05, + "loss": 0.1029, + "step": 50500 + }, + { + "epoch": 21.21, + "learning_rate": 1.7879367720465893e-05, + "loss": 0.0943, + "step": 51000 + }, + { + "epoch": 21.42, + "learning_rate": 1.7858569051580702e-05, + "loss": 0.0929, + "step": 51500 + }, + { + "epoch": 21.63, + "learning_rate": 1.7837770382695508e-05, + "loss": 0.0965, + "step": 52000 + }, + { + "epoch": 21.84, + "learning_rate": 1.7816971713810317e-05, + "loss": 0.0916, + "step": 52500 + }, + { + "epoch": 22.0, + "eval_cer": 0.2685, + "eval_gen_len": 13.6404, + "eval_loss": 0.315157949924469, + "eval_runtime": 259.3506, + "eval_samples_per_second": 12.362, + "eval_steps_per_second": 3.092, + "step": 52888 + }, + { + "epoch": 22.05, + "learning_rate": 1.7796173044925127e-05, + "loss": 0.0992, + "step": 53000 + }, + { + "epoch": 22.25, + "learning_rate": 1.7775415973377707e-05, + "loss": 0.0884, + "step": 53500 + }, + { + "epoch": 22.46, + "learning_rate": 1.7754617304492513e-05, + "loss": 0.0922, + "step": 54000 + }, + { + "epoch": 22.67, + "learning_rate": 1.7733818635607322e-05, + "loss": 0.0826, + "step": 54500 + }, + { + "epoch": 22.88, + "learning_rate": 1.771301996672213e-05, + "loss": 0.0934, + "step": 55000 + }, + { + "epoch": 23.0, + "eval_cer": 0.2711, + "eval_gen_len": 13.8575, + "eval_loss": 0.3333517909049988, + "eval_runtime": 261.6177, + "eval_samples_per_second": 12.255, + "eval_steps_per_second": 3.066, + "step": 55292 + }, + { + "epoch": 23.09, + "learning_rate": 1.769222129783694e-05, + "loss": 0.0861, + "step": 55500 + }, + { + "epoch": 23.29, + "learning_rate": 1.7671464226289517e-05, + "loss": 0.081, + "step": 56000 + }, + { + "epoch": 23.5, + "learning_rate": 1.7650665557404327e-05, + "loss": 0.0798, + "step": 56500 + }, + { + "epoch": 23.71, + "learning_rate": 1.7629866888519136e-05, + "loss": 0.0843, + "step": 57000 + }, + { + "epoch": 23.92, + "learning_rate": 1.7609068219633945e-05, + "loss": 0.0941, + "step": 57500 + }, + { + "epoch": 24.0, + "eval_cer": 0.2656, + "eval_gen_len": 13.5583, + "eval_loss": 0.3143361210823059, + "eval_runtime": 261.1394, + "eval_samples_per_second": 12.277, + "eval_steps_per_second": 3.071, + "step": 57696 + }, + { + "epoch": 24.13, + "learning_rate": 1.7588269550748755e-05, + "loss": 0.0809, + "step": 58000 + }, + { + "epoch": 24.33, + "learning_rate": 1.756747088186356e-05, + "loss": 0.0784, + "step": 58500 + }, + { + "epoch": 24.54, + "learning_rate": 1.754671381031614e-05, + "loss": 0.0785, + "step": 59000 + }, + { + "epoch": 24.75, + "learning_rate": 1.752591514143095e-05, + "loss": 0.0782, + "step": 59500 + }, + { + "epoch": 24.96, + "learning_rate": 1.750511647254576e-05, + "loss": 0.0773, + "step": 60000 + }, + { + "epoch": 25.0, + "eval_cer": 0.2641, + "eval_gen_len": 13.5561, + "eval_loss": 0.3231368660926819, + "eval_runtime": 255.8084, + "eval_samples_per_second": 12.533, + "eval_steps_per_second": 3.135, + "step": 60100 + }, + { + "epoch": 25.17, + "learning_rate": 1.7484317803660565e-05, + "loss": 0.0744, + "step": 60500 + }, + { + "epoch": 25.37, + "learning_rate": 1.7463519134775375e-05, + "loss": 0.0719, + "step": 61000 + }, + { + "epoch": 25.58, + "learning_rate": 1.7442762063227955e-05, + "loss": 0.0821, + "step": 61500 + }, + { + "epoch": 25.79, + "learning_rate": 1.7421963394342764e-05, + "loss": 0.0781, + "step": 62000 + }, + { + "epoch": 26.0, + "learning_rate": 1.740116472545757e-05, + "loss": 0.0759, + "step": 62500 + }, + { + "epoch": 26.0, + "eval_cer": 0.2668, + "eval_gen_len": 13.7564, + "eval_loss": 0.3243275582790375, + "eval_runtime": 262.8593, + "eval_samples_per_second": 12.197, + "eval_steps_per_second": 3.051, + "step": 62504 + }, + { + "epoch": 26.21, + "learning_rate": 1.738036605657238e-05, + "loss": 0.0664, + "step": 63000 + }, + { + "epoch": 26.41, + "learning_rate": 1.735956738768719e-05, + "loss": 0.0683, + "step": 63500 + }, + { + "epoch": 26.62, + "learning_rate": 1.733881031613977e-05, + "loss": 0.0712, + "step": 64000 + }, + { + "epoch": 26.83, + "learning_rate": 1.7318011647254578e-05, + "loss": 0.077, + "step": 64500 + }, + { + "epoch": 27.0, + "eval_cer": 0.267, + "eval_gen_len": 13.7558, + "eval_loss": 0.33405444025993347, + "eval_runtime": 259.6084, + "eval_samples_per_second": 12.349, + "eval_steps_per_second": 3.089, + "step": 64908 + }, + { + "epoch": 27.04, + "learning_rate": 1.7297212978369387e-05, + "loss": 0.076, + "step": 65000 + }, + { + "epoch": 27.25, + "learning_rate": 1.7276414309484197e-05, + "loss": 0.0595, + "step": 65500 + }, + { + "epoch": 27.45, + "learning_rate": 1.7255615640599003e-05, + "loss": 0.0608, + "step": 66000 + }, + { + "epoch": 27.66, + "learning_rate": 1.7234858569051583e-05, + "loss": 0.0705, + "step": 66500 + }, + { + "epoch": 27.87, + "learning_rate": 1.7214059900166392e-05, + "loss": 0.0743, + "step": 67000 + }, + { + "epoch": 28.0, + "eval_cer": 0.2796, + "eval_gen_len": 14.1085, + "eval_loss": 0.3349040448665619, + "eval_runtime": 282.8059, + "eval_samples_per_second": 11.336, + "eval_steps_per_second": 2.836, + "step": 67312 + }, + { + "epoch": 28.08, + "learning_rate": 1.71932612312812e-05, + "loss": 0.0674, + "step": 67500 + }, + { + "epoch": 28.29, + "learning_rate": 1.7172462562396007e-05, + "loss": 0.0621, + "step": 68000 + }, + { + "epoch": 28.49, + "learning_rate": 1.7151663893510817e-05, + "loss": 0.0661, + "step": 68500 + }, + { + "epoch": 28.7, + "learning_rate": 1.7130865224625626e-05, + "loss": 0.0611, + "step": 69000 + }, + { + "epoch": 28.91, + "learning_rate": 1.7110108153078206e-05, + "loss": 0.0662, + "step": 69500 + }, + { + "epoch": 29.0, + "eval_cer": 0.2623, + "eval_gen_len": 13.5396, + "eval_loss": 0.3251936137676239, + "eval_runtime": 258.4167, + "eval_samples_per_second": 12.406, + "eval_steps_per_second": 3.104, + "step": 69716 + }, + { + "epoch": 29.12, + "learning_rate": 1.7089309484193012e-05, + "loss": 0.0652, + "step": 70000 + }, + { + "epoch": 29.33, + "learning_rate": 1.706851081530782e-05, + "loss": 0.0557, + "step": 70500 + }, + { + "epoch": 29.53, + "learning_rate": 1.704771214642263e-05, + "loss": 0.0584, + "step": 71000 + }, + { + "epoch": 29.74, + "learning_rate": 1.702691347753744e-05, + "loss": 0.0627, + "step": 71500 + }, + { + "epoch": 29.95, + "learning_rate": 1.700611480865225e-05, + "loss": 0.0685, + "step": 72000 + }, + { + "epoch": 30.0, + "eval_cer": 0.2643, + "eval_gen_len": 13.6528, + "eval_loss": 0.33093786239624023, + "eval_runtime": 269.9487, + "eval_samples_per_second": 11.876, + "eval_steps_per_second": 2.971, + "step": 72120 + }, + { + "epoch": 30.16, + "learning_rate": 1.6985357737104826e-05, + "loss": 0.0592, + "step": 72500 + }, + { + "epoch": 30.37, + "learning_rate": 1.6964559068219635e-05, + "loss": 0.0564, + "step": 73000 + }, + { + "epoch": 30.57, + "learning_rate": 1.6943760399334445e-05, + "loss": 0.0623, + "step": 73500 + }, + { + "epoch": 30.78, + "learning_rate": 1.692296173044925e-05, + "loss": 0.0599, + "step": 74000 + }, + { + "epoch": 30.99, + "learning_rate": 1.690220465890183e-05, + "loss": 0.0619, + "step": 74500 + }, + { + "epoch": 31.0, + "eval_cer": 0.266, + "eval_gen_len": 13.7171, + "eval_loss": 0.3532868027687073, + "eval_runtime": 257.8089, + "eval_samples_per_second": 12.436, + "eval_steps_per_second": 3.111, + "step": 74524 + }, + { + "epoch": 31.2, + "learning_rate": 1.688140599001664e-05, + "loss": 0.0499, + "step": 75000 + }, + { + "epoch": 31.41, + "learning_rate": 1.686060732113145e-05, + "loss": 0.0592, + "step": 75500 + }, + { + "epoch": 31.61, + "learning_rate": 1.6839808652246255e-05, + "loss": 0.0555, + "step": 76000 + }, + { + "epoch": 31.82, + "learning_rate": 1.6819009983361064e-05, + "loss": 0.0602, + "step": 76500 + }, + { + "epoch": 32.0, + "eval_cer": 0.2661, + "eval_gen_len": 13.8924, + "eval_loss": 0.34633541107177734, + "eval_runtime": 271.365, + "eval_samples_per_second": 11.814, + "eval_steps_per_second": 2.955, + "step": 76928 + }, + { + "epoch": 32.03, + "learning_rate": 1.6798211314475874e-05, + "loss": 0.0575, + "step": 77000 + }, + { + "epoch": 32.24, + "learning_rate": 1.6777412645590683e-05, + "loss": 0.0506, + "step": 77500 + }, + { + "epoch": 32.45, + "learning_rate": 1.6756613976705493e-05, + "loss": 0.0566, + "step": 78000 + }, + { + "epoch": 32.65, + "learning_rate": 1.6735856905158072e-05, + "loss": 0.0532, + "step": 78500 + }, + { + "epoch": 32.86, + "learning_rate": 1.6715058236272882e-05, + "loss": 0.0568, + "step": 79000 + }, + { + "epoch": 33.0, + "eval_cer": 0.2645, + "eval_gen_len": 13.5443, + "eval_loss": 0.3539523482322693, + "eval_runtime": 256.805, + "eval_samples_per_second": 12.484, + "eval_steps_per_second": 3.123, + "step": 79332 + }, + { + "epoch": 33.07, + "learning_rate": 1.669425956738769e-05, + "loss": 0.057, + "step": 79500 + }, + { + "epoch": 33.28, + "learning_rate": 1.6673460898502497e-05, + "loss": 0.0467, + "step": 80000 + }, + { + "epoch": 33.49, + "learning_rate": 1.6652703826955077e-05, + "loss": 0.0514, + "step": 80500 + }, + { + "epoch": 33.69, + "learning_rate": 1.6631905158069886e-05, + "loss": 0.0522, + "step": 81000 + }, + { + "epoch": 33.9, + "learning_rate": 1.6611106489184692e-05, + "loss": 0.0528, + "step": 81500 + }, + { + "epoch": 34.0, + "eval_cer": 0.2621, + "eval_gen_len": 13.4984, + "eval_loss": 0.3489411473274231, + "eval_runtime": 254.1092, + "eval_samples_per_second": 12.617, + "eval_steps_per_second": 3.156, + "step": 81736 + }, + { + "epoch": 34.11, + "learning_rate": 1.6590307820299502e-05, + "loss": 0.05, + "step": 82000 + }, + { + "epoch": 34.32, + "learning_rate": 1.656955074875208e-05, + "loss": 0.0477, + "step": 82500 + }, + { + "epoch": 34.53, + "learning_rate": 1.654875207986689e-05, + "loss": 0.0567, + "step": 83000 + }, + { + "epoch": 34.73, + "learning_rate": 1.6527953410981697e-05, + "loss": 0.0477, + "step": 83500 + }, + { + "epoch": 34.94, + "learning_rate": 1.6507154742096506e-05, + "loss": 0.0544, + "step": 84000 + }, + { + "epoch": 35.0, + "eval_cer": 0.2662, + "eval_gen_len": 13.7679, + "eval_loss": 0.3497730493545532, + "eval_runtime": 268.9078, + "eval_samples_per_second": 11.922, + "eval_steps_per_second": 2.982, + "step": 84140 + }, + { + "epoch": 35.15, + "learning_rate": 1.6486356073211316e-05, + "loss": 0.0433, + "step": 84500 + }, + { + "epoch": 35.36, + "learning_rate": 1.6465599001663896e-05, + "loss": 0.0426, + "step": 85000 + }, + { + "epoch": 35.57, + "learning_rate": 1.64448003327787e-05, + "loss": 0.0477, + "step": 85500 + }, + { + "epoch": 35.77, + "learning_rate": 1.642400166389351e-05, + "loss": 0.0497, + "step": 86000 + }, + { + "epoch": 35.98, + "learning_rate": 1.640320299500832e-05, + "loss": 0.0571, + "step": 86500 + }, + { + "epoch": 36.0, + "eval_cer": 0.2661, + "eval_gen_len": 13.8001, + "eval_loss": 0.35858893394470215, + "eval_runtime": 266.9991, + "eval_samples_per_second": 12.008, + "eval_steps_per_second": 3.004, + "step": 86544 + }, + { + "epoch": 36.19, + "learning_rate": 1.63824459234609e-05, + "loss": 0.0406, + "step": 87000 + }, + { + "epoch": 36.4, + "learning_rate": 1.6361647254575706e-05, + "loss": 0.0431, + "step": 87500 + }, + { + "epoch": 36.61, + "learning_rate": 1.6340848585690516e-05, + "loss": 0.0507, + "step": 88000 + }, + { + "epoch": 36.81, + "learning_rate": 1.6320049916805325e-05, + "loss": 0.0492, + "step": 88500 + }, + { + "epoch": 37.0, + "eval_cer": 0.2564, + "eval_gen_len": 13.5415, + "eval_loss": 0.35140517354011536, + "eval_runtime": 259.4147, + "eval_samples_per_second": 12.359, + "eval_steps_per_second": 3.092, + "step": 88948 + }, + { + "epoch": 37.02, + "learning_rate": 1.6299251247920134e-05, + "loss": 0.0515, + "step": 89000 + }, + { + "epoch": 37.23, + "learning_rate": 1.6278452579034944e-05, + "loss": 0.0421, + "step": 89500 + }, + { + "epoch": 37.44, + "learning_rate": 1.6257695507487523e-05, + "loss": 0.0432, + "step": 90000 + }, + { + "epoch": 37.65, + "learning_rate": 1.6236896838602333e-05, + "loss": 0.0449, + "step": 90500 + }, + { + "epoch": 37.85, + "learning_rate": 1.621609816971714e-05, + "loss": 0.0484, + "step": 91000 + }, + { + "epoch": 38.0, + "eval_cer": 0.2639, + "eval_gen_len": 13.8534, + "eval_loss": 0.3746128976345062, + "eval_runtime": 254.0333, + "eval_samples_per_second": 12.62, + "eval_steps_per_second": 3.157, + "step": 91352 + }, + { + "epoch": 38.06, + "learning_rate": 1.6195299500831948e-05, + "loss": 0.0481, + "step": 91500 + }, + { + "epoch": 38.27, + "learning_rate": 1.6174500831946758e-05, + "loss": 0.0402, + "step": 92000 + }, + { + "epoch": 38.48, + "learning_rate": 1.6153743760399337e-05, + "loss": 0.0395, + "step": 92500 + }, + { + "epoch": 38.69, + "learning_rate": 1.6132945091514143e-05, + "loss": 0.0456, + "step": 93000 + }, + { + "epoch": 38.89, + "learning_rate": 1.6112146422628953e-05, + "loss": 0.0451, + "step": 93500 + }, + { + "epoch": 39.0, + "eval_cer": 0.2627, + "eval_gen_len": 13.7527, + "eval_loss": 0.35255342721939087, + "eval_runtime": 264.6452, + "eval_samples_per_second": 12.114, + "eval_steps_per_second": 3.03, + "step": 93756 + }, + { + "epoch": 39.1, + "learning_rate": 1.6091347753743762e-05, + "loss": 0.0363, + "step": 94000 + }, + { + "epoch": 39.31, + "learning_rate": 1.6070590682196342e-05, + "loss": 0.0382, + "step": 94500 + }, + { + "epoch": 39.52, + "learning_rate": 1.6049792013311148e-05, + "loss": 0.0448, + "step": 95000 + }, + { + "epoch": 39.73, + "learning_rate": 1.6028993344425957e-05, + "loss": 0.0399, + "step": 95500 + }, + { + "epoch": 39.93, + "learning_rate": 1.6008194675540767e-05, + "loss": 0.045, + "step": 96000 + }, + { + "epoch": 40.0, + "eval_cer": 0.2583, + "eval_gen_len": 13.6694, + "eval_loss": 0.3510436713695526, + "eval_runtime": 264.9384, + "eval_samples_per_second": 12.101, + "eval_steps_per_second": 3.027, + "step": 96160 + }, + { + "epoch": 40.14, + "learning_rate": 1.5987437603993347e-05, + "loss": 0.0393, + "step": 96500 + }, + { + "epoch": 40.35, + "learning_rate": 1.5966638935108153e-05, + "loss": 0.0338, + "step": 97000 + }, + { + "epoch": 40.56, + "learning_rate": 1.5945840266222962e-05, + "loss": 0.0398, + "step": 97500 + }, + { + "epoch": 40.77, + "learning_rate": 1.5925083194675542e-05, + "loss": 0.0425, + "step": 98000 + }, + { + "epoch": 40.97, + "learning_rate": 1.590428452579035e-05, + "loss": 0.0455, + "step": 98500 + }, + { + "epoch": 41.0, + "eval_cer": 0.2619, + "eval_gen_len": 13.7012, + "eval_loss": 0.3675419092178345, + "eval_runtime": 255.341, + "eval_samples_per_second": 12.556, + "eval_steps_per_second": 3.141, + "step": 98564 + }, + { + "epoch": 41.18, + "learning_rate": 1.5883485856905157e-05, + "loss": 0.0372, + "step": 99000 + }, + { + "epoch": 41.39, + "learning_rate": 1.5862687188019967e-05, + "loss": 0.0378, + "step": 99500 + }, + { + "epoch": 41.6, + "learning_rate": 1.5841888519134776e-05, + "loss": 0.0369, + "step": 100000 + }, + { + "epoch": 41.81, + "learning_rate": 1.5821089850249585e-05, + "loss": 0.0452, + "step": 100500 + }, + { + "epoch": 42.0, + "eval_cer": 0.2672, + "eval_gen_len": 13.8653, + "eval_loss": 0.3757382035255432, + "eval_runtime": 268.6077, + "eval_samples_per_second": 11.936, + "eval_steps_per_second": 2.986, + "step": 100968 + }, + { + "epoch": 42.01, + "learning_rate": 1.5800291181364395e-05, + "loss": 0.046, + "step": 101000 + }, + { + "epoch": 42.22, + "learning_rate": 1.57794925124792e-05, + "loss": 0.0319, + "step": 101500 + }, + { + "epoch": 42.43, + "learning_rate": 1.5758735440931784e-05, + "loss": 0.0383, + "step": 102000 + }, + { + "epoch": 42.64, + "learning_rate": 1.573793677204659e-05, + "loss": 0.0376, + "step": 102500 + }, + { + "epoch": 42.85, + "learning_rate": 1.57171381031614e-05, + "loss": 0.0403, + "step": 103000 + }, + { + "epoch": 43.0, + "eval_cer": 0.2583, + "eval_gen_len": 13.6532, + "eval_loss": 0.3421362340450287, + "eval_runtime": 257.7664, + "eval_samples_per_second": 12.438, + "eval_steps_per_second": 3.111, + "step": 103372 + }, + { + "epoch": 43.05, + "learning_rate": 1.5696339434276205e-05, + "loss": 0.0419, + "step": 103500 + }, + { + "epoch": 43.26, + "learning_rate": 1.5675540765391015e-05, + "loss": 0.0322, + "step": 104000 + }, + { + "epoch": 43.47, + "learning_rate": 1.5654783693843594e-05, + "loss": 0.0396, + "step": 104500 + }, + { + "epoch": 43.68, + "learning_rate": 1.5633985024958404e-05, + "loss": 0.0386, + "step": 105000 + }, + { + "epoch": 43.89, + "learning_rate": 1.5613186356073213e-05, + "loss": 0.0372, + "step": 105500 + }, + { + "epoch": 44.0, + "eval_cer": 0.2553, + "eval_gen_len": 13.7679, + "eval_loss": 0.37172210216522217, + "eval_runtime": 266.5505, + "eval_samples_per_second": 12.028, + "eval_steps_per_second": 3.009, + "step": 105776 + }, + { + "epoch": 44.09, + "learning_rate": 1.5592387687188023e-05, + "loss": 0.0381, + "step": 106000 + }, + { + "epoch": 44.3, + "learning_rate": 1.55716306156406e-05, + "loss": 0.0343, + "step": 106500 + }, + { + "epoch": 44.51, + "learning_rate": 1.555083194675541e-05, + "loss": 0.0348, + "step": 107000 + }, + { + "epoch": 44.72, + "learning_rate": 1.5530033277870218e-05, + "loss": 0.0412, + "step": 107500 + }, + { + "epoch": 44.93, + "learning_rate": 1.5509234608985027e-05, + "loss": 0.041, + "step": 108000 + }, + { + "epoch": 45.0, + "eval_cer": 0.258, + "eval_gen_len": 13.7246, + "eval_loss": 0.367112934589386, + "eval_runtime": 255.2355, + "eval_samples_per_second": 12.561, + "eval_steps_per_second": 3.142, + "step": 108180 + }, + { + "epoch": 45.13, + "learning_rate": 1.5488477537437604e-05, + "loss": 0.0358, + "step": 108500 + }, + { + "epoch": 45.34, + "learning_rate": 1.5467678868552413e-05, + "loss": 0.0347, + "step": 109000 + }, + { + "epoch": 45.55, + "learning_rate": 1.5446880199667222e-05, + "loss": 0.0372, + "step": 109500 + }, + { + "epoch": 45.76, + "learning_rate": 1.5426081530782032e-05, + "loss": 0.0369, + "step": 110000 + }, + { + "epoch": 45.97, + "learning_rate": 1.540528286189684e-05, + "loss": 0.0372, + "step": 110500 + }, + { + "epoch": 46.0, + "eval_cer": 0.2596, + "eval_gen_len": 13.8244, + "eval_loss": 0.36525318026542664, + "eval_runtime": 258.7606, + "eval_samples_per_second": 12.39, + "eval_steps_per_second": 3.099, + "step": 110584 + }, + { + "epoch": 46.17, + "learning_rate": 1.5384525790349418e-05, + "loss": 0.0276, + "step": 111000 + }, + { + "epoch": 46.38, + "learning_rate": 1.5363727121464227e-05, + "loss": 0.0311, + "step": 111500 + }, + { + "epoch": 46.59, + "learning_rate": 1.5342928452579036e-05, + "loss": 0.0343, + "step": 112000 + }, + { + "epoch": 46.8, + "learning_rate": 1.5322129783693846e-05, + "loss": 0.0418, + "step": 112500 + }, + { + "epoch": 47.0, + "eval_cer": 0.2573, + "eval_gen_len": 13.7427, + "eval_loss": 0.3767205476760864, + "eval_runtime": 259.1923, + "eval_samples_per_second": 12.369, + "eval_steps_per_second": 3.094, + "step": 112988 + }, + { + "epoch": 47.0, + "learning_rate": 1.5301372712146422e-05, + "loss": 0.0387, + "step": 113000 + }, + { + "epoch": 47.21, + "learning_rate": 1.528057404326123e-05, + "loss": 0.0288, + "step": 113500 + }, + { + "epoch": 47.42, + "learning_rate": 1.525977537437604e-05, + "loss": 0.0316, + "step": 114000 + }, + { + "epoch": 47.63, + "learning_rate": 1.5238976705490849e-05, + "loss": 0.0302, + "step": 114500 + }, + { + "epoch": 47.84, + "learning_rate": 1.521821963394343e-05, + "loss": 0.036, + "step": 115000 + }, + { + "epoch": 48.0, + "eval_cer": 0.2645, + "eval_gen_len": 13.9616, + "eval_loss": 0.3853040933609009, + "eval_runtime": 258.5981, + "eval_samples_per_second": 12.398, + "eval_steps_per_second": 3.101, + "step": 115392 + }, + { + "epoch": 48.04, + "learning_rate": 1.5197420965058238e-05, + "loss": 0.0414, + "step": 115500 + }, + { + "epoch": 48.25, + "learning_rate": 1.5176622296173047e-05, + "loss": 0.0302, + "step": 116000 + }, + { + "epoch": 48.46, + "learning_rate": 1.5155823627287855e-05, + "loss": 0.0336, + "step": 116500 + }, + { + "epoch": 48.67, + "learning_rate": 1.5135066555740435e-05, + "loss": 0.0346, + "step": 117000 + }, + { + "epoch": 48.88, + "learning_rate": 1.5114267886855242e-05, + "loss": 0.0354, + "step": 117500 + }, + { + "epoch": 49.0, + "eval_cer": 0.2571, + "eval_gen_len": 13.3799, + "eval_loss": 0.3713897466659546, + "eval_runtime": 256.4932, + "eval_samples_per_second": 12.499, + "eval_steps_per_second": 3.127, + "step": 117796 + }, + { + "epoch": 49.08, + "learning_rate": 1.5093469217970052e-05, + "loss": 0.0285, + "step": 118000 + }, + { + "epoch": 49.29, + "learning_rate": 1.507267054908486e-05, + "loss": 0.0311, + "step": 118500 + }, + { + "epoch": 49.5, + "learning_rate": 1.5051871880199669e-05, + "loss": 0.0297, + "step": 119000 + }, + { + "epoch": 49.71, + "learning_rate": 1.5031114808652247e-05, + "loss": 0.0327, + "step": 119500 + }, + { + "epoch": 49.92, + "learning_rate": 1.5010316139767056e-05, + "loss": 0.0336, + "step": 120000 + }, + { + "epoch": 50.0, + "eval_cer": 0.2592, + "eval_gen_len": 13.7667, + "eval_loss": 0.38062140345573425, + "eval_runtime": 257.9154, + "eval_samples_per_second": 12.43, + "eval_steps_per_second": 3.11, + "step": 120200 + }, + { + "epoch": 50.12, + "learning_rate": 1.4989517470881864e-05, + "loss": 0.0291, + "step": 120500 + }, + { + "epoch": 50.33, + "learning_rate": 1.4968718801996673e-05, + "loss": 0.0286, + "step": 121000 + }, + { + "epoch": 50.54, + "learning_rate": 1.4947920133111483e-05, + "loss": 0.0282, + "step": 121500 + }, + { + "epoch": 50.75, + "learning_rate": 1.4927163061564061e-05, + "loss": 0.0299, + "step": 122000 + }, + { + "epoch": 50.96, + "learning_rate": 1.4906364392678869e-05, + "loss": 0.0367, + "step": 122500 + }, + { + "epoch": 51.0, + "eval_cer": 0.2567, + "eval_gen_len": 13.7402, + "eval_loss": 0.36996766924858093, + "eval_runtime": 265.273, + "eval_samples_per_second": 12.086, + "eval_steps_per_second": 3.023, + "step": 122604 + }, + { + "epoch": 51.16, + "learning_rate": 1.4885607321131449e-05, + "loss": 0.0269, + "step": 123000 + }, + { + "epoch": 51.37, + "learning_rate": 1.4864808652246256e-05, + "loss": 0.0288, + "step": 123500 + }, + { + "epoch": 51.58, + "learning_rate": 1.4844009983361066e-05, + "loss": 0.0306, + "step": 124000 + }, + { + "epoch": 51.79, + "learning_rate": 1.4823211314475873e-05, + "loss": 0.0352, + "step": 124500 + }, + { + "epoch": 52.0, + "learning_rate": 1.4802412645590683e-05, + "loss": 0.0278, + "step": 125000 + }, + { + "epoch": 52.0, + "eval_cer": 0.2552, + "eval_gen_len": 13.6507, + "eval_loss": 0.37308937311172485, + "eval_runtime": 264.0574, + "eval_samples_per_second": 12.141, + "eval_steps_per_second": 3.037, + "step": 125008 + }, + { + "epoch": 52.2, + "learning_rate": 1.478161397670549e-05, + "loss": 0.0245, + "step": 125500 + }, + { + "epoch": 52.41, + "learning_rate": 1.47608153078203e-05, + "loss": 0.0256, + "step": 126000 + }, + { + "epoch": 52.62, + "learning_rate": 1.4740016638935109e-05, + "loss": 0.0303, + "step": 126500 + }, + { + "epoch": 52.83, + "learning_rate": 1.4719259567387689e-05, + "loss": 0.0318, + "step": 127000 + }, + { + "epoch": 53.0, + "eval_cer": 0.2558, + "eval_gen_len": 13.4878, + "eval_loss": 0.3711611330509186, + "eval_runtime": 256.1423, + "eval_samples_per_second": 12.516, + "eval_steps_per_second": 3.131, + "step": 127412 + }, + { + "epoch": 53.04, + "learning_rate": 1.4698460898502498e-05, + "loss": 0.0308, + "step": 127500 + }, + { + "epoch": 53.24, + "learning_rate": 1.4677662229617306e-05, + "loss": 0.0258, + "step": 128000 + }, + { + "epoch": 53.45, + "learning_rate": 1.4656863560732115e-05, + "loss": 0.0268, + "step": 128500 + }, + { + "epoch": 53.66, + "learning_rate": 1.4636064891846925e-05, + "loss": 0.0306, + "step": 129000 + }, + { + "epoch": 53.87, + "learning_rate": 1.4615307820299503e-05, + "loss": 0.0307, + "step": 129500 + }, + { + "epoch": 54.0, + "eval_cer": 0.2545, + "eval_gen_len": 13.6042, + "eval_loss": 0.3638122081756592, + "eval_runtime": 259.1147, + "eval_samples_per_second": 12.373, + "eval_steps_per_second": 3.095, + "step": 129816 + }, + { + "epoch": 54.08, + "learning_rate": 1.459450915141431e-05, + "loss": 0.0246, + "step": 130000 + }, + { + "epoch": 54.28, + "learning_rate": 1.457371048252912e-05, + "loss": 0.0206, + "step": 130500 + }, + { + "epoch": 54.49, + "learning_rate": 1.455291181364393e-05, + "loss": 0.0298, + "step": 131000 + }, + { + "epoch": 54.7, + "learning_rate": 1.4532196339434276e-05, + "loss": 0.0284, + "step": 131500 + }, + { + "epoch": 54.91, + "learning_rate": 1.4511397670549086e-05, + "loss": 0.0277, + "step": 132000 + }, + { + "epoch": 55.0, + "eval_cer": 0.2574, + "eval_gen_len": 13.8247, + "eval_loss": 0.3866593539714813, + "eval_runtime": 266.2003, + "eval_samples_per_second": 12.044, + "eval_steps_per_second": 3.013, + "step": 132220 + }, + { + "epoch": 55.12, + "learning_rate": 1.4490599001663895e-05, + "loss": 0.0258, + "step": 132500 + }, + { + "epoch": 55.32, + "learning_rate": 1.4469800332778703e-05, + "loss": 0.0264, + "step": 133000 + }, + { + "epoch": 55.53, + "learning_rate": 1.4449001663893512e-05, + "loss": 0.0273, + "step": 133500 + }, + { + "epoch": 55.74, + "learning_rate": 1.442820299500832e-05, + "loss": 0.033, + "step": 134000 + }, + { + "epoch": 55.95, + "learning_rate": 1.4407404326123129e-05, + "loss": 0.0289, + "step": 134500 + }, + { + "epoch": 56.0, + "eval_cer": 0.26, + "eval_gen_len": 13.9024, + "eval_loss": 0.38215455412864685, + "eval_runtime": 268.768, + "eval_samples_per_second": 11.929, + "eval_steps_per_second": 2.984, + "step": 134624 + }, + { + "epoch": 56.16, + "learning_rate": 1.4386605657237937e-05, + "loss": 0.0263, + "step": 135000 + }, + { + "epoch": 56.36, + "learning_rate": 1.4365848585690517e-05, + "loss": 0.0227, + "step": 135500 + }, + { + "epoch": 56.57, + "learning_rate": 1.4345049916805324e-05, + "loss": 0.0276, + "step": 136000 + }, + { + "epoch": 56.78, + "learning_rate": 1.4324251247920134e-05, + "loss": 0.0277, + "step": 136500 + }, + { + "epoch": 56.99, + "learning_rate": 1.4303494176372714e-05, + "loss": 0.0259, + "step": 137000 + }, + { + "epoch": 57.0, + "eval_cer": 0.2541, + "eval_gen_len": 13.6722, + "eval_loss": 0.3895968198776245, + "eval_runtime": 256.4716, + "eval_samples_per_second": 12.5, + "eval_steps_per_second": 3.127, + "step": 137028 + }, + { + "epoch": 57.2, + "learning_rate": 1.4282695507487523e-05, + "loss": 0.0202, + "step": 137500 + }, + { + "epoch": 57.4, + "learning_rate": 1.4261896838602332e-05, + "loss": 0.0236, + "step": 138000 + }, + { + "epoch": 57.61, + "learning_rate": 1.424109816971714e-05, + "loss": 0.0288, + "step": 138500 + }, + { + "epoch": 57.82, + "learning_rate": 1.422029950083195e-05, + "loss": 0.0277, + "step": 139000 + }, + { + "epoch": 58.0, + "eval_cer": 0.2584, + "eval_gen_len": 13.7208, + "eval_loss": 0.38816508650779724, + "eval_runtime": 264.0106, + "eval_samples_per_second": 12.143, + "eval_steps_per_second": 3.038, + "step": 139432 + }, + { + "epoch": 58.03, + "learning_rate": 1.4199500831946757e-05, + "loss": 0.0297, + "step": 139500 + }, + { + "epoch": 58.24, + "learning_rate": 1.4178702163061566e-05, + "loss": 0.0206, + "step": 140000 + }, + { + "epoch": 58.44, + "learning_rate": 1.4157945091514145e-05, + "loss": 0.0245, + "step": 140500 + }, + { + "epoch": 58.65, + "learning_rate": 1.4137146422628954e-05, + "loss": 0.0264, + "step": 141000 + }, + { + "epoch": 58.86, + "learning_rate": 1.4116347753743762e-05, + "loss": 0.0289, + "step": 141500 + }, + { + "epoch": 59.0, + "eval_cer": 0.2587, + "eval_gen_len": 13.7096, + "eval_loss": 0.39096423983573914, + "eval_runtime": 260.1849, + "eval_samples_per_second": 12.322, + "eval_steps_per_second": 3.082, + "step": 141836 + }, + { + "epoch": 59.07, + "learning_rate": 1.4095549084858571e-05, + "loss": 0.0243, + "step": 142000 + }, + { + "epoch": 59.28, + "learning_rate": 1.4074750415973379e-05, + "loss": 0.0218, + "step": 142500 + }, + { + "epoch": 59.48, + "learning_rate": 1.4053951747088188e-05, + "loss": 0.0241, + "step": 143000 + }, + { + "epoch": 59.69, + "learning_rate": 1.4033194675540766e-05, + "loss": 0.0242, + "step": 143500 + }, + { + "epoch": 59.9, + "learning_rate": 1.4012396006655576e-05, + "loss": 0.0252, + "step": 144000 + }, + { + "epoch": 60.0, + "eval_cer": 0.2515, + "eval_gen_len": 13.6971, + "eval_loss": 0.38846734166145325, + "eval_runtime": 258.3853, + "eval_samples_per_second": 12.408, + "eval_steps_per_second": 3.104, + "step": 144240 + }, + { + "epoch": 60.11, + "learning_rate": 1.3991597337770383e-05, + "loss": 0.0259, + "step": 144500 + }, + { + "epoch": 60.32, + "learning_rate": 1.3970798668885193e-05, + "loss": 0.0208, + "step": 145000 + }, + { + "epoch": 60.52, + "learning_rate": 1.3950000000000002e-05, + "loss": 0.0252, + "step": 145500 + }, + { + "epoch": 60.73, + "learning_rate": 1.392920133111481e-05, + "loss": 0.0253, + "step": 146000 + }, + { + "epoch": 60.94, + "learning_rate": 1.3908402662229619e-05, + "loss": 0.0265, + "step": 146500 + }, + { + "epoch": 61.0, + "eval_cer": 0.2569, + "eval_gen_len": 13.85, + "eval_loss": 0.3915986716747284, + "eval_runtime": 271.1284, + "eval_samples_per_second": 11.825, + "eval_steps_per_second": 2.958, + "step": 146644 + }, + { + "epoch": 61.15, + "learning_rate": 1.3887603993344427e-05, + "loss": 0.0232, + "step": 147000 + }, + { + "epoch": 61.36, + "learning_rate": 1.3866846921797007e-05, + "loss": 0.0233, + "step": 147500 + }, + { + "epoch": 61.56, + "learning_rate": 1.3846048252911814e-05, + "loss": 0.0224, + "step": 148000 + }, + { + "epoch": 61.77, + "learning_rate": 1.3825249584026624e-05, + "loss": 0.0229, + "step": 148500 + }, + { + "epoch": 61.98, + "learning_rate": 1.3804450915141431e-05, + "loss": 0.0229, + "step": 149000 + }, + { + "epoch": 62.0, + "eval_cer": 0.2565, + "eval_gen_len": 13.8206, + "eval_loss": 0.3992536962032318, + "eval_runtime": 268.8678, + "eval_samples_per_second": 11.924, + "eval_steps_per_second": 2.983, + "step": 149048 + }, + { + "epoch": 62.19, + "learning_rate": 1.378365224625624e-05, + "loss": 0.0199, + "step": 149500 + }, + { + "epoch": 62.4, + "learning_rate": 1.3762895174708819e-05, + "loss": 0.0239, + "step": 150000 + }, + { + "epoch": 62.6, + "learning_rate": 1.3742096505823628e-05, + "loss": 0.0261, + "step": 150500 + }, + { + "epoch": 62.81, + "learning_rate": 1.3721297836938436e-05, + "loss": 0.0225, + "step": 151000 + }, + { + "epoch": 63.0, + "eval_cer": 0.2507, + "eval_gen_len": 13.6354, + "eval_loss": 0.3880002200603485, + "eval_runtime": 264.7668, + "eval_samples_per_second": 12.109, + "eval_steps_per_second": 3.029, + "step": 151452 + }, + { + "epoch": 63.02, + "learning_rate": 1.3700499168053245e-05, + "loss": 0.0268, + "step": 151500 + }, + { + "epoch": 63.23, + "learning_rate": 1.3679742096505825e-05, + "loss": 0.0188, + "step": 152000 + }, + { + "epoch": 63.44, + "learning_rate": 1.3658985024958405e-05, + "loss": 0.022, + "step": 152500 + }, + { + "epoch": 63.64, + "learning_rate": 1.3638186356073213e-05, + "loss": 0.0232, + "step": 153000 + }, + { + "epoch": 63.85, + "learning_rate": 1.3617387687188022e-05, + "loss": 0.0221, + "step": 153500 + }, + { + "epoch": 64.0, + "eval_cer": 0.2583, + "eval_gen_len": 13.9485, + "eval_loss": 0.3893982470035553, + "eval_runtime": 270.1502, + "eval_samples_per_second": 11.867, + "eval_steps_per_second": 2.969, + "step": 153856 + }, + { + "epoch": 64.06, + "learning_rate": 1.359658901830283e-05, + "loss": 0.0268, + "step": 154000 + }, + { + "epoch": 64.27, + "learning_rate": 1.3575790349417639e-05, + "loss": 0.0159, + "step": 154500 + }, + { + "epoch": 64.48, + "learning_rate": 1.3554991680532448e-05, + "loss": 0.0191, + "step": 155000 + }, + { + "epoch": 64.68, + "learning_rate": 1.3534193011647256e-05, + "loss": 0.02, + "step": 155500 + }, + { + "epoch": 64.89, + "learning_rate": 1.3513394342762065e-05, + "loss": 0.0234, + "step": 156000 + }, + { + "epoch": 65.0, + "eval_cer": 0.2515, + "eval_gen_len": 13.6329, + "eval_loss": 0.39693862199783325, + "eval_runtime": 262.2666, + "eval_samples_per_second": 12.224, + "eval_steps_per_second": 3.058, + "step": 156260 + }, + { + "epoch": 65.1, + "learning_rate": 1.3492595673876873e-05, + "loss": 0.0225, + "step": 156500 + }, + { + "epoch": 65.31, + "learning_rate": 1.3471838602329453e-05, + "loss": 0.0219, + "step": 157000 + }, + { + "epoch": 65.52, + "learning_rate": 1.3451081530782031e-05, + "loss": 0.0224, + "step": 157500 + }, + { + "epoch": 65.72, + "learning_rate": 1.3430282861896839e-05, + "loss": 0.0222, + "step": 158000 + }, + { + "epoch": 65.93, + "learning_rate": 1.3409484193011648e-05, + "loss": 0.0251, + "step": 158500 + }, + { + "epoch": 66.0, + "eval_cer": 0.2524, + "eval_gen_len": 13.9046, + "eval_loss": 0.3977407217025757, + "eval_runtime": 268.0146, + "eval_samples_per_second": 11.962, + "eval_steps_per_second": 2.992, + "step": 158664 + }, + { + "epoch": 66.14, + "learning_rate": 1.3388685524126458e-05, + "loss": 0.0181, + "step": 159000 + }, + { + "epoch": 66.35, + "learning_rate": 1.3367886855241265e-05, + "loss": 0.0194, + "step": 159500 + }, + { + "epoch": 66.56, + "learning_rate": 1.3347129783693843e-05, + "loss": 0.0209, + "step": 160000 + }, + { + "epoch": 66.76, + "learning_rate": 1.3326331114808653e-05, + "loss": 0.0235, + "step": 160500 + }, + { + "epoch": 66.97, + "learning_rate": 1.3305532445923462e-05, + "loss": 0.0253, + "step": 161000 + }, + { + "epoch": 67.0, + "eval_cer": 0.2511, + "eval_gen_len": 13.6563, + "eval_loss": 0.40182340145111084, + "eval_runtime": 252.4386, + "eval_samples_per_second": 12.7, + "eval_steps_per_second": 3.177, + "step": 161068 + }, + { + "epoch": 67.18, + "learning_rate": 1.328473377703827e-05, + "loss": 0.0218, + "step": 161500 + }, + { + "epoch": 67.39, + "learning_rate": 1.326393510815308e-05, + "loss": 0.0175, + "step": 162000 + }, + { + "epoch": 67.6, + "learning_rate": 1.3243136439267887e-05, + "loss": 0.0204, + "step": 162500 + }, + { + "epoch": 67.8, + "learning_rate": 1.3222337770382696e-05, + "loss": 0.0197, + "step": 163000 + }, + { + "epoch": 68.0, + "eval_cer": 0.2559, + "eval_gen_len": 14.0237, + "eval_loss": 0.3884351849555969, + "eval_runtime": 272.5844, + "eval_samples_per_second": 11.761, + "eval_steps_per_second": 2.942, + "step": 163472 + }, + { + "epoch": 68.01, + "learning_rate": 1.3201539101497504e-05, + "loss": 0.0241, + "step": 163500 + }, + { + "epoch": 68.22, + "learning_rate": 1.3180782029950084e-05, + "loss": 0.0179, + "step": 164000 + }, + { + "epoch": 68.43, + "learning_rate": 1.3159983361064892e-05, + "loss": 0.0175, + "step": 164500 + }, + { + "epoch": 68.64, + "learning_rate": 1.3139184692179701e-05, + "loss": 0.0227, + "step": 165000 + }, + { + "epoch": 68.84, + "learning_rate": 1.3118386023294509e-05, + "loss": 0.0219, + "step": 165500 + }, + { + "epoch": 69.0, + "eval_cer": 0.2519, + "eval_gen_len": 13.5009, + "eval_loss": 0.402326375246048, + "eval_runtime": 254.8714, + "eval_samples_per_second": 12.579, + "eval_steps_per_second": 3.147, + "step": 165876 + }, + { + "epoch": 69.05, + "learning_rate": 1.309762895174709e-05, + "loss": 0.0229, + "step": 166000 + }, + { + "epoch": 69.26, + "learning_rate": 1.30768302828619e-05, + "loss": 0.0183, + "step": 166500 + }, + { + "epoch": 69.47, + "learning_rate": 1.3056073211314478e-05, + "loss": 0.0195, + "step": 167000 + }, + { + "epoch": 69.68, + "learning_rate": 1.3035274542429285e-05, + "loss": 0.0204, + "step": 167500 + }, + { + "epoch": 69.88, + "learning_rate": 1.3014475873544095e-05, + "loss": 0.0207, + "step": 168000 + }, + { + "epoch": 70.0, + "eval_cer": 0.2559, + "eval_gen_len": 13.4392, + "eval_loss": 0.390462189912796, + "eval_runtime": 247.3478, + "eval_samples_per_second": 12.962, + "eval_steps_per_second": 3.242, + "step": 168280 + }, + { + "epoch": 70.09, + "learning_rate": 1.2993677204658904e-05, + "loss": 0.0216, + "step": 168500 + }, + { + "epoch": 70.3, + "learning_rate": 1.2972878535773712e-05, + "loss": 0.0192, + "step": 169000 + }, + { + "epoch": 70.51, + "learning_rate": 1.2952079866888521e-05, + "loss": 0.0192, + "step": 169500 + }, + { + "epoch": 70.72, + "learning_rate": 1.2931281198003329e-05, + "loss": 0.0187, + "step": 170000 + }, + { + "epoch": 70.92, + "learning_rate": 1.2910524126455907e-05, + "loss": 0.0233, + "step": 170500 + }, + { + "epoch": 71.0, + "eval_cer": 0.2574, + "eval_gen_len": 13.7012, + "eval_loss": 0.4090117812156677, + "eval_runtime": 257.6398, + "eval_samples_per_second": 12.444, + "eval_steps_per_second": 3.113, + "step": 170684 + }, + { + "epoch": 71.13, + "learning_rate": 1.2889725457570716e-05, + "loss": 0.019, + "step": 171000 + }, + { + "epoch": 71.34, + "learning_rate": 1.2868926788685526e-05, + "loss": 0.0159, + "step": 171500 + }, + { + "epoch": 71.55, + "learning_rate": 1.2848128119800333e-05, + "loss": 0.0213, + "step": 172000 + }, + { + "epoch": 71.76, + "learning_rate": 1.2827329450915143e-05, + "loss": 0.0177, + "step": 172500 + }, + { + "epoch": 71.96, + "learning_rate": 1.280653078202995e-05, + "loss": 0.024, + "step": 173000 + }, + { + "epoch": 72.0, + "eval_cer": 0.2472, + "eval_gen_len": 13.6382, + "eval_loss": 0.38612431287765503, + "eval_runtime": 258.2841, + "eval_samples_per_second": 12.413, + "eval_steps_per_second": 3.105, + "step": 173088 + }, + { + "epoch": 72.17, + "learning_rate": 1.278577371048253e-05, + "loss": 0.0176, + "step": 173500 + }, + { + "epoch": 72.38, + "learning_rate": 1.2764975041597338e-05, + "loss": 0.017, + "step": 174000 + }, + { + "epoch": 72.59, + "learning_rate": 1.2744176372712147e-05, + "loss": 0.0167, + "step": 174500 + }, + { + "epoch": 72.8, + "learning_rate": 1.2723377703826955e-05, + "loss": 0.0219, + "step": 175000 + }, + { + "epoch": 73.0, + "eval_cer": 0.2647, + "eval_gen_len": 14.1634, + "eval_loss": 0.4713122546672821, + "eval_runtime": 271.6072, + "eval_samples_per_second": 11.804, + "eval_steps_per_second": 2.953, + "step": 175492 + }, + { + "epoch": 73.0, + "learning_rate": 1.2702579034941764e-05, + "loss": 0.0198, + "step": 175500 + }, + { + "epoch": 73.21, + "learning_rate": 1.2681780366056574e-05, + "loss": 0.0147, + "step": 176000 + }, + { + "epoch": 73.42, + "learning_rate": 1.2660981697171381e-05, + "loss": 0.0181, + "step": 176500 + }, + { + "epoch": 73.63, + "learning_rate": 1.264018302828619e-05, + "loss": 0.02, + "step": 177000 + }, + { + "epoch": 73.84, + "learning_rate": 1.2619384359400998e-05, + "loss": 0.0189, + "step": 177500 + }, + { + "epoch": 74.0, + "eval_cer": 0.2536, + "eval_gen_len": 13.9267, + "eval_loss": 0.39804303646087646, + "eval_runtime": 266.4263, + "eval_samples_per_second": 12.033, + "eval_steps_per_second": 3.01, + "step": 177896 + }, + { + "epoch": 74.04, + "learning_rate": 1.2598627287853578e-05, + "loss": 0.0215, + "step": 178000 + }, + { + "epoch": 74.25, + "learning_rate": 1.2577828618968386e-05, + "loss": 0.0159, + "step": 178500 + }, + { + "epoch": 74.46, + "learning_rate": 1.2557029950083195e-05, + "loss": 0.0188, + "step": 179000 + }, + { + "epoch": 74.67, + "learning_rate": 1.2536231281198003e-05, + "loss": 0.0169, + "step": 179500 + }, + { + "epoch": 74.88, + "learning_rate": 1.2515474209650585e-05, + "loss": 0.0162, + "step": 180000 + }, + { + "epoch": 75.0, + "eval_cer": 0.2529, + "eval_gen_len": 13.6974, + "eval_loss": 0.39674171805381775, + "eval_runtime": 262.8858, + "eval_samples_per_second": 12.195, + "eval_steps_per_second": 3.051, + "step": 180300 + }, + { + "epoch": 75.08, + "learning_rate": 1.2494675540765392e-05, + "loss": 0.0193, + "step": 180500 + }, + { + "epoch": 75.29, + "learning_rate": 1.2473876871880202e-05, + "loss": 0.0162, + "step": 181000 + }, + { + "epoch": 75.5, + "learning_rate": 1.2453078202995011e-05, + "loss": 0.0174, + "step": 181500 + }, + { + "epoch": 75.71, + "learning_rate": 1.243232113144759e-05, + "loss": 0.017, + "step": 182000 + }, + { + "epoch": 75.92, + "learning_rate": 1.2411522462562397e-05, + "loss": 0.0183, + "step": 182500 + }, + { + "epoch": 76.0, + "eval_cer": 0.2512, + "eval_gen_len": 13.708, + "eval_loss": 0.42503321170806885, + "eval_runtime": 262.6259, + "eval_samples_per_second": 12.207, + "eval_steps_per_second": 3.054, + "step": 182704 + }, + { + "epoch": 76.12, + "learning_rate": 1.2390765391014977e-05, + "loss": 0.0172, + "step": 183000 + }, + { + "epoch": 76.33, + "learning_rate": 1.2369966722129784e-05, + "loss": 0.0164, + "step": 183500 + }, + { + "epoch": 76.54, + "learning_rate": 1.2349168053244594e-05, + "loss": 0.0179, + "step": 184000 + }, + { + "epoch": 76.75, + "learning_rate": 1.2328369384359401e-05, + "loss": 0.0181, + "step": 184500 + }, + { + "epoch": 76.96, + "learning_rate": 1.230757071547421e-05, + "loss": 0.0202, + "step": 185000 + }, + { + "epoch": 77.0, + "eval_cer": 0.2548, + "eval_gen_len": 13.8085, + "eval_loss": 0.4239508807659149, + "eval_runtime": 252.2131, + "eval_samples_per_second": 12.711, + "eval_steps_per_second": 3.18, + "step": 185108 + }, + { + "epoch": 77.16, + "learning_rate": 1.228677204658902e-05, + "loss": 0.015, + "step": 185500 + }, + { + "epoch": 77.37, + "learning_rate": 1.2265973377703828e-05, + "loss": 0.0148, + "step": 186000 + }, + { + "epoch": 77.58, + "learning_rate": 1.2245174708818637e-05, + "loss": 0.0179, + "step": 186500 + }, + { + "epoch": 77.79, + "learning_rate": 1.2224417637271215e-05, + "loss": 0.019, + "step": 187000 + }, + { + "epoch": 78.0, + "learning_rate": 1.2203660565723794e-05, + "loss": 0.0186, + "step": 187500 + }, + { + "epoch": 78.0, + "eval_cer": 0.2522, + "eval_gen_len": 13.806, + "eval_loss": 0.4266161620616913, + "eval_runtime": 261.9494, + "eval_samples_per_second": 12.239, + "eval_steps_per_second": 3.062, + "step": 187512 + }, + { + "epoch": 78.2, + "learning_rate": 1.2182861896838603e-05, + "loss": 0.0155, + "step": 188000 + }, + { + "epoch": 78.41, + "learning_rate": 1.216206322795341e-05, + "loss": 0.0167, + "step": 188500 + }, + { + "epoch": 78.62, + "learning_rate": 1.214126455906822e-05, + "loss": 0.0176, + "step": 189000 + }, + { + "epoch": 78.83, + "learning_rate": 1.212046589018303e-05, + "loss": 0.016, + "step": 189500 + }, + { + "epoch": 79.0, + "eval_cer": 0.2499, + "eval_gen_len": 13.713, + "eval_loss": 0.4089159667491913, + "eval_runtime": 255.1636, + "eval_samples_per_second": 12.564, + "eval_steps_per_second": 3.143, + "step": 189916 + }, + { + "epoch": 79.03, + "learning_rate": 1.2099667221297837e-05, + "loss": 0.0165, + "step": 190000 + }, + { + "epoch": 79.24, + "learning_rate": 1.2078868552412646e-05, + "loss": 0.0136, + "step": 190500 + }, + { + "epoch": 79.45, + "learning_rate": 1.2058111480865226e-05, + "loss": 0.0154, + "step": 191000 + }, + { + "epoch": 79.66, + "learning_rate": 1.2037312811980036e-05, + "loss": 0.0168, + "step": 191500 + }, + { + "epoch": 79.87, + "learning_rate": 1.2016514143094843e-05, + "loss": 0.0188, + "step": 192000 + }, + { + "epoch": 80.0, + "eval_cer": 0.2501, + "eval_gen_len": 13.7745, + "eval_loss": 0.41349881887435913, + "eval_runtime": 263.2542, + "eval_samples_per_second": 12.178, + "eval_steps_per_second": 3.046, + "step": 192320 + }, + { + "epoch": 80.07, + "learning_rate": 1.1995715474209653e-05, + "loss": 0.0169, + "step": 192500 + }, + { + "epoch": 80.28, + "learning_rate": 1.1974916805324459e-05, + "loss": 0.0142, + "step": 193000 + }, + { + "epoch": 80.49, + "learning_rate": 1.1954118136439268e-05, + "loss": 0.0186, + "step": 193500 + }, + { + "epoch": 80.7, + "learning_rate": 1.1933319467554076e-05, + "loss": 0.0173, + "step": 194000 + }, + { + "epoch": 80.91, + "learning_rate": 1.1912520798668885e-05, + "loss": 0.016, + "step": 194500 + }, + { + "epoch": 81.0, + "eval_cer": 0.2477, + "eval_gen_len": 13.6622, + "eval_loss": 0.3864258825778961, + "eval_runtime": 259.6006, + "eval_samples_per_second": 12.35, + "eval_steps_per_second": 3.089, + "step": 194724 + }, + { + "epoch": 81.11, + "learning_rate": 1.1891763727121467e-05, + "loss": 0.0166, + "step": 195000 + }, + { + "epoch": 81.32, + "learning_rate": 1.1870965058236274e-05, + "loss": 0.0141, + "step": 195500 + }, + { + "epoch": 81.53, + "learning_rate": 1.1850207986688853e-05, + "loss": 0.0151, + "step": 196000 + }, + { + "epoch": 81.74, + "learning_rate": 1.1829409317803662e-05, + "loss": 0.0176, + "step": 196500 + }, + { + "epoch": 81.95, + "learning_rate": 1.180861064891847e-05, + "loss": 0.0156, + "step": 197000 + }, + { + "epoch": 82.0, + "eval_cer": 0.2551, + "eval_gen_len": 13.9261, + "eval_loss": 0.4278740882873535, + "eval_runtime": 261.2997, + "eval_samples_per_second": 12.269, + "eval_steps_per_second": 3.069, + "step": 197128 + }, + { + "epoch": 82.15, + "learning_rate": 1.1787811980033279e-05, + "loss": 0.0173, + "step": 197500 + }, + { + "epoch": 82.36, + "learning_rate": 1.1767013311148088e-05, + "loss": 0.0153, + "step": 198000 + }, + { + "epoch": 82.57, + "learning_rate": 1.1746214642262896e-05, + "loss": 0.016, + "step": 198500 + }, + { + "epoch": 82.78, + "learning_rate": 1.1725415973377705e-05, + "loss": 0.0136, + "step": 199000 + }, + { + "epoch": 82.99, + "learning_rate": 1.1704617304492513e-05, + "loss": 0.018, + "step": 199500 + }, + { + "epoch": 83.0, + "eval_cer": 0.25, + "eval_gen_len": 13.704, + "eval_loss": 0.4216358959674835, + "eval_runtime": 257.2522, + "eval_samples_per_second": 12.462, + "eval_steps_per_second": 3.118, + "step": 199532 + }, + { + "epoch": 83.19, + "learning_rate": 1.1683818635607322e-05, + "loss": 0.0125, + "step": 200000 + }, + { + "epoch": 83.4, + "learning_rate": 1.16630615640599e-05, + "loss": 0.0158, + "step": 200500 + }, + { + "epoch": 83.61, + "learning_rate": 1.1642304492512479e-05, + "loss": 0.0153, + "step": 201000 + }, + { + "epoch": 83.82, + "learning_rate": 1.1621505823627288e-05, + "loss": 0.0159, + "step": 201500 + }, + { + "epoch": 84.0, + "eval_cer": 0.2502, + "eval_gen_len": 13.7121, + "eval_loss": 0.42220476269721985, + "eval_runtime": 260.9684, + "eval_samples_per_second": 12.285, + "eval_steps_per_second": 3.073, + "step": 201936 + }, + { + "epoch": 84.03, + "learning_rate": 1.1600707154742097e-05, + "loss": 0.0159, + "step": 202000 + }, + { + "epoch": 84.23, + "learning_rate": 1.1579908485856905e-05, + "loss": 0.0157, + "step": 202500 + }, + { + "epoch": 84.44, + "learning_rate": 1.1559109816971715e-05, + "loss": 0.0149, + "step": 203000 + }, + { + "epoch": 84.65, + "learning_rate": 1.1538311148086522e-05, + "loss": 0.0148, + "step": 203500 + }, + { + "epoch": 84.86, + "learning_rate": 1.1517512479201332e-05, + "loss": 0.0165, + "step": 204000 + }, + { + "epoch": 85.0, + "eval_cer": 0.2482, + "eval_gen_len": 13.7233, + "eval_loss": 0.4069821238517761, + "eval_runtime": 256.2579, + "eval_samples_per_second": 12.511, + "eval_steps_per_second": 3.13, + "step": 204340 + }, + { + "epoch": 85.07, + "learning_rate": 1.149675540765391e-05, + "loss": 0.0162, + "step": 204500 + }, + { + "epoch": 85.27, + "learning_rate": 1.1475956738768719e-05, + "loss": 0.0128, + "step": 205000 + }, + { + "epoch": 85.48, + "learning_rate": 1.1455158069883527e-05, + "loss": 0.0149, + "step": 205500 + }, + { + "epoch": 85.69, + "learning_rate": 1.1434359400998336e-05, + "loss": 0.0182, + "step": 206000 + }, + { + "epoch": 85.9, + "learning_rate": 1.1413560732113146e-05, + "loss": 0.0149, + "step": 206500 + }, + { + "epoch": 86.0, + "eval_cer": 0.2493, + "eval_gen_len": 13.7623, + "eval_loss": 0.4060095250606537, + "eval_runtime": 264.6587, + "eval_samples_per_second": 12.114, + "eval_steps_per_second": 3.03, + "step": 206744 + }, + { + "epoch": 86.11, + "learning_rate": 1.1392762063227953e-05, + "loss": 0.0131, + "step": 207000 + }, + { + "epoch": 86.31, + "learning_rate": 1.1371963394342763e-05, + "loss": 0.0145, + "step": 207500 + }, + { + "epoch": 86.52, + "learning_rate": 1.135116472545757e-05, + "loss": 0.0133, + "step": 208000 + }, + { + "epoch": 86.73, + "learning_rate": 1.1330407653910152e-05, + "loss": 0.0141, + "step": 208500 + }, + { + "epoch": 86.94, + "learning_rate": 1.130960898502496e-05, + "loss": 0.014, + "step": 209000 + }, + { + "epoch": 87.0, + "eval_cer": 0.2461, + "eval_gen_len": 13.6067, + "eval_loss": 0.42620450258255005, + "eval_runtime": 258.5687, + "eval_samples_per_second": 12.399, + "eval_steps_per_second": 3.102, + "step": 209148 + }, + { + "epoch": 87.15, + "learning_rate": 1.1288810316139769e-05, + "loss": 0.0131, + "step": 209500 + }, + { + "epoch": 87.35, + "learning_rate": 1.1268011647254578e-05, + "loss": 0.0121, + "step": 210000 + }, + { + "epoch": 87.56, + "learning_rate": 1.1247212978369386e-05, + "loss": 0.015, + "step": 210500 + }, + { + "epoch": 87.77, + "learning_rate": 1.1226414309484195e-05, + "loss": 0.0137, + "step": 211000 + }, + { + "epoch": 87.98, + "learning_rate": 1.1205615640599003e-05, + "loss": 0.0161, + "step": 211500 + }, + { + "epoch": 88.0, + "eval_cer": 0.249, + "eval_gen_len": 13.758, + "eval_loss": 0.4252397418022156, + "eval_runtime": 265.7747, + "eval_samples_per_second": 12.063, + "eval_steps_per_second": 3.018, + "step": 211552 + }, + { + "epoch": 88.19, + "learning_rate": 1.1184816971713812e-05, + "loss": 0.0126, + "step": 212000 + }, + { + "epoch": 88.39, + "learning_rate": 1.116405990016639e-05, + "loss": 0.0136, + "step": 212500 + }, + { + "epoch": 88.6, + "learning_rate": 1.1143302828618969e-05, + "loss": 0.0141, + "step": 213000 + }, + { + "epoch": 88.81, + "learning_rate": 1.1122504159733778e-05, + "loss": 0.0142, + "step": 213500 + }, + { + "epoch": 89.0, + "eval_cer": 0.2511, + "eval_gen_len": 13.8013, + "eval_loss": 0.43667590618133545, + "eval_runtime": 258.922, + "eval_samples_per_second": 12.382, + "eval_steps_per_second": 3.097, + "step": 213956 + }, + { + "epoch": 89.02, + "learning_rate": 1.1101705490848587e-05, + "loss": 0.0155, + "step": 214000 + }, + { + "epoch": 89.23, + "learning_rate": 1.1080906821963395e-05, + "loss": 0.0149, + "step": 214500 + }, + { + "epoch": 89.43, + "learning_rate": 1.1060108153078204e-05, + "loss": 0.0138, + "step": 215000 + }, + { + "epoch": 89.64, + "learning_rate": 1.1039309484193012e-05, + "loss": 0.0131, + "step": 215500 + }, + { + "epoch": 89.85, + "learning_rate": 1.1018510815307822e-05, + "loss": 0.0146, + "step": 216000 + }, + { + "epoch": 90.0, + "eval_cer": 0.2483, + "eval_gen_len": 13.6778, + "eval_loss": 0.41625672578811646, + "eval_runtime": 260.915, + "eval_samples_per_second": 12.288, + "eval_steps_per_second": 3.074, + "step": 216360 + }, + { + "epoch": 90.06, + "learning_rate": 1.099771214642263e-05, + "loss": 0.0131, + "step": 216500 + }, + { + "epoch": 90.27, + "learning_rate": 1.0976955074875209e-05, + "loss": 0.0128, + "step": 217000 + }, + { + "epoch": 90.47, + "learning_rate": 1.0956156405990017e-05, + "loss": 0.0139, + "step": 217500 + }, + { + "epoch": 90.68, + "learning_rate": 1.0935357737104826e-05, + "loss": 0.0146, + "step": 218000 + }, + { + "epoch": 90.89, + "learning_rate": 1.0914559068219634e-05, + "loss": 0.0127, + "step": 218500 + }, + { + "epoch": 91.0, + "eval_cer": 0.2466, + "eval_gen_len": 13.6344, + "eval_loss": 0.42400336265563965, + "eval_runtime": 259.4085, + "eval_samples_per_second": 12.359, + "eval_steps_per_second": 3.092, + "step": 218764 + }, + { + "epoch": 91.1, + "learning_rate": 1.0893843594009986e-05, + "loss": 0.0131, + "step": 219000 + }, + { + "epoch": 91.31, + "learning_rate": 1.0873044925124794e-05, + "loss": 0.0139, + "step": 219500 + }, + { + "epoch": 91.51, + "learning_rate": 1.0852246256239603e-05, + "loss": 0.0144, + "step": 220000 + }, + { + "epoch": 91.72, + "learning_rate": 1.083144758735441e-05, + "loss": 0.016, + "step": 220500 + }, + { + "epoch": 91.93, + "learning_rate": 1.081064891846922e-05, + "loss": 0.0147, + "step": 221000 + }, + { + "epoch": 92.0, + "eval_cer": 0.2457, + "eval_gen_len": 13.5948, + "eval_loss": 0.4094228148460388, + "eval_runtime": 257.4623, + "eval_samples_per_second": 12.452, + "eval_steps_per_second": 3.115, + "step": 221168 + }, + { + "epoch": 92.14, + "learning_rate": 1.078985024958403e-05, + "loss": 0.0136, + "step": 221500 + }, + { + "epoch": 92.35, + "learning_rate": 1.0769051580698837e-05, + "loss": 0.0123, + "step": 222000 + }, + { + "epoch": 92.55, + "learning_rate": 1.0748252911813646e-05, + "loss": 0.0113, + "step": 222500 + }, + { + "epoch": 92.76, + "learning_rate": 1.0727454242928454e-05, + "loss": 0.0153, + "step": 223000 + }, + { + "epoch": 92.97, + "learning_rate": 1.0706697171381034e-05, + "loss": 0.0153, + "step": 223500 + }, + { + "epoch": 93.0, + "eval_cer": 0.2414, + "eval_gen_len": 13.5168, + "eval_loss": 0.419572651386261, + "eval_runtime": 254.173, + "eval_samples_per_second": 12.613, + "eval_steps_per_second": 3.155, + "step": 223572 + }, + { + "epoch": 93.18, + "learning_rate": 1.0685898502495842e-05, + "loss": 0.0131, + "step": 224000 + }, + { + "epoch": 93.39, + "learning_rate": 1.0665099833610651e-05, + "loss": 0.0138, + "step": 224500 + }, + { + "epoch": 93.59, + "learning_rate": 1.0644301164725459e-05, + "loss": 0.0125, + "step": 225000 + }, + { + "epoch": 93.8, + "learning_rate": 1.0623544093178037e-05, + "loss": 0.0158, + "step": 225500 + }, + { + "epoch": 94.0, + "eval_cer": 0.2491, + "eval_gen_len": 13.8378, + "eval_loss": 0.4395461678504944, + "eval_runtime": 257.6708, + "eval_samples_per_second": 12.442, + "eval_steps_per_second": 3.112, + "step": 225976 + }, + { + "epoch": 94.01, + "learning_rate": 1.0602745424292846e-05, + "loss": 0.0141, + "step": 226000 + }, + { + "epoch": 94.22, + "learning_rate": 1.0581946755407656e-05, + "loss": 0.0147, + "step": 226500 + }, + { + "epoch": 94.43, + "learning_rate": 1.0561148086522463e-05, + "loss": 0.0119, + "step": 227000 + }, + { + "epoch": 94.63, + "learning_rate": 1.0540391014975041e-05, + "loss": 0.0139, + "step": 227500 + }, + { + "epoch": 94.84, + "learning_rate": 1.0519633943427621e-05, + "loss": 0.0138, + "step": 228000 + }, + { + "epoch": 95.0, + "eval_cer": 0.2518, + "eval_gen_len": 13.7695, + "eval_loss": 0.43901219964027405, + "eval_runtime": 262.2107, + "eval_samples_per_second": 12.227, + "eval_steps_per_second": 3.059, + "step": 228380 + }, + { + "epoch": 95.05, + "learning_rate": 1.0498835274542429e-05, + "loss": 0.0121, + "step": 228500 + }, + { + "epoch": 95.26, + "learning_rate": 1.0478036605657238e-05, + "loss": 0.0106, + "step": 229000 + }, + { + "epoch": 95.47, + "learning_rate": 1.0457237936772046e-05, + "loss": 0.0126, + "step": 229500 + }, + { + "epoch": 95.67, + "learning_rate": 1.0436439267886855e-05, + "loss": 0.0156, + "step": 230000 + }, + { + "epoch": 95.88, + "learning_rate": 1.0415640599001665e-05, + "loss": 0.0123, + "step": 230500 + }, + { + "epoch": 96.0, + "eval_cer": 0.2475, + "eval_gen_len": 13.69, + "eval_loss": 0.4405384361743927, + "eval_runtime": 260.5068, + "eval_samples_per_second": 12.307, + "eval_steps_per_second": 3.079, + "step": 230784 + }, + { + "epoch": 96.09, + "learning_rate": 1.0394841930116472e-05, + "loss": 0.011, + "step": 231000 + }, + { + "epoch": 96.3, + "learning_rate": 1.0374084858569054e-05, + "loss": 0.0132, + "step": 231500 + }, + { + "epoch": 96.51, + "learning_rate": 1.0353286189683862e-05, + "loss": 0.0135, + "step": 232000 + }, + { + "epoch": 96.71, + "learning_rate": 1.0332487520798671e-05, + "loss": 0.0143, + "step": 232500 + }, + { + "epoch": 96.92, + "learning_rate": 1.0311688851913479e-05, + "loss": 0.0136, + "step": 233000 + }, + { + "epoch": 97.0, + "eval_cer": 0.2492, + "eval_gen_len": 13.791, + "eval_loss": 0.4154476225376129, + "eval_runtime": 266.6041, + "eval_samples_per_second": 12.025, + "eval_steps_per_second": 3.008, + "step": 233188 + }, + { + "epoch": 97.13, + "learning_rate": 1.0290890183028288e-05, + "loss": 0.0117, + "step": 233500 + }, + { + "epoch": 97.34, + "learning_rate": 1.0270091514143094e-05, + "loss": 0.0116, + "step": 234000 + }, + { + "epoch": 97.55, + "learning_rate": 1.0249292845257903e-05, + "loss": 0.0117, + "step": 234500 + }, + { + "epoch": 97.75, + "learning_rate": 1.0228494176372713e-05, + "loss": 0.0158, + "step": 235000 + }, + { + "epoch": 97.96, + "learning_rate": 1.0207737104825293e-05, + "loss": 0.012, + "step": 235500 + }, + { + "epoch": 98.0, + "eval_cer": 0.2481, + "eval_gen_len": 13.8702, + "eval_loss": 0.43725699186325073, + "eval_runtime": 257.4309, + "eval_samples_per_second": 12.454, + "eval_steps_per_second": 3.115, + "step": 235592 + }, + { + "epoch": 98.17, + "learning_rate": 1.0186938435940102e-05, + "loss": 0.0098, + "step": 236000 + }, + { + "epoch": 98.38, + "learning_rate": 1.016618136439268e-05, + "loss": 0.0118, + "step": 236500 + }, + { + "epoch": 98.59, + "learning_rate": 1.0145382695507488e-05, + "loss": 0.0128, + "step": 237000 + }, + { + "epoch": 98.79, + "learning_rate": 1.0124584026622297e-05, + "loss": 0.0122, + "step": 237500 + }, + { + "epoch": 99.0, + "eval_cer": 0.2504, + "eval_gen_len": 13.8051, + "eval_loss": 0.44157010316848755, + "eval_runtime": 284.8185, + "eval_samples_per_second": 11.256, + "eval_steps_per_second": 2.816, + "step": 237996 + }, + { + "epoch": 99.0, + "learning_rate": 1.0103785357737107e-05, + "loss": 0.0136, + "step": 238000 + }, + { + "epoch": 99.21, + "learning_rate": 1.0082986688851914e-05, + "loss": 0.0115, + "step": 238500 + }, + { + "epoch": 99.42, + "learning_rate": 1.0062188019966724e-05, + "loss": 0.0109, + "step": 239000 + }, + { + "epoch": 99.63, + "learning_rate": 1.0041389351081531e-05, + "loss": 0.0135, + "step": 239500 + }, + { + "epoch": 99.83, + "learning_rate": 1.002059068219634e-05, + "loss": 0.0146, + "step": 240000 + }, + { + "epoch": 100.0, + "eval_cer": 0.2489, + "eval_gen_len": 13.6868, + "eval_loss": 0.42966365814208984, + "eval_runtime": 282.168, + "eval_samples_per_second": 11.362, + "eval_steps_per_second": 2.842, + "step": 240400 + }, + { + "epoch": 100.04, + "learning_rate": 9.99979201331115e-06, + "loss": 0.0136, + "step": 240500 + }, + { + "epoch": 100.25, + "learning_rate": 9.979034941763728e-06, + "loss": 0.0132, + "step": 241000 + }, + { + "epoch": 100.46, + "learning_rate": 9.958236272878536e-06, + "loss": 0.0112, + "step": 241500 + }, + { + "epoch": 100.67, + "learning_rate": 9.937437603993345e-06, + "loss": 0.0139, + "step": 242000 + }, + { + "epoch": 100.87, + "learning_rate": 9.916638935108155e-06, + "loss": 0.0135, + "step": 242500 + }, + { + "epoch": 101.0, + "eval_cer": 0.2428, + "eval_gen_len": 13.5689, + "eval_loss": 0.4310346245765686, + "eval_runtime": 280.207, + "eval_samples_per_second": 11.442, + "eval_steps_per_second": 2.862, + "step": 242804 + }, + { + "epoch": 101.08, + "learning_rate": 9.895881863560733e-06, + "loss": 0.0118, + "step": 243000 + }, + { + "epoch": 101.29, + "learning_rate": 9.875083194675542e-06, + "loss": 0.0111, + "step": 243500 + }, + { + "epoch": 101.5, + "learning_rate": 9.854284525790352e-06, + "loss": 0.0115, + "step": 244000 + }, + { + "epoch": 101.71, + "learning_rate": 9.83352745424293e-06, + "loss": 0.0124, + "step": 244500 + }, + { + "epoch": 101.91, + "learning_rate": 9.812728785357737e-06, + "loss": 0.0136, + "step": 245000 + }, + { + "epoch": 102.0, + "eval_cer": 0.246, + "eval_gen_len": 13.4972, + "eval_loss": 0.422376424074173, + "eval_runtime": 269.57, + "eval_samples_per_second": 11.893, + "eval_steps_per_second": 2.975, + "step": 245208 + }, + { + "epoch": 102.12, + "learning_rate": 9.791930116472547e-06, + "loss": 0.0124, + "step": 245500 + }, + { + "epoch": 102.33, + "learning_rate": 9.771131447587356e-06, + "loss": 0.0101, + "step": 246000 + }, + { + "epoch": 102.54, + "learning_rate": 9.750332778702164e-06, + "loss": 0.0109, + "step": 246500 + }, + { + "epoch": 102.75, + "learning_rate": 9.729534109816973e-06, + "loss": 0.0108, + "step": 247000 + }, + { + "epoch": 102.95, + "learning_rate": 9.70873544093178e-06, + "loss": 0.014, + "step": 247500 + }, + { + "epoch": 103.0, + "eval_cer": 0.2502, + "eval_gen_len": 13.8394, + "eval_loss": 0.45676541328430176, + "eval_runtime": 270.1526, + "eval_samples_per_second": 11.867, + "eval_steps_per_second": 2.969, + "step": 247612 + }, + { + "epoch": 103.16, + "learning_rate": 9.68793677204659e-06, + "loss": 0.0106, + "step": 248000 + }, + { + "epoch": 103.37, + "learning_rate": 9.667179700499168e-06, + "loss": 0.0117, + "step": 248500 + }, + { + "epoch": 103.58, + "learning_rate": 9.646381031613978e-06, + "loss": 0.0114, + "step": 249000 + }, + { + "epoch": 103.79, + "learning_rate": 9.625582362728785e-06, + "loss": 0.0135, + "step": 249500 + }, + { + "epoch": 103.99, + "learning_rate": 9.604783693843595e-06, + "loss": 0.0125, + "step": 250000 + }, + { + "epoch": 104.0, + "eval_cer": 0.2465, + "eval_gen_len": 13.4345, + "eval_loss": 0.39919513463974, + "eval_runtime": 270.6771, + "eval_samples_per_second": 11.844, + "eval_steps_per_second": 2.963, + "step": 250016 + }, + { + "epoch": 104.2, + "learning_rate": 9.583985024958402e-06, + "loss": 0.01, + "step": 250500 + }, + { + "epoch": 104.41, + "learning_rate": 9.563227953410982e-06, + "loss": 0.0126, + "step": 251000 + }, + { + "epoch": 104.62, + "learning_rate": 9.542429284525792e-06, + "loss": 0.0109, + "step": 251500 + }, + { + "epoch": 104.83, + "learning_rate": 9.5216306156406e-06, + "loss": 0.0135, + "step": 252000 + }, + { + "epoch": 105.0, + "eval_cer": 0.2472, + "eval_gen_len": 13.7277, + "eval_loss": 0.44164207577705383, + "eval_runtime": 274.8419, + "eval_samples_per_second": 11.665, + "eval_steps_per_second": 2.918, + "step": 252420 + }, + { + "epoch": 105.03, + "learning_rate": 9.500831946755409e-06, + "loss": 0.0119, + "step": 252500 + }, + { + "epoch": 105.24, + "learning_rate": 9.480033277870218e-06, + "loss": 0.0095, + "step": 253000 + }, + { + "epoch": 105.45, + "learning_rate": 9.459276206322796e-06, + "loss": 0.0115, + "step": 253500 + }, + { + "epoch": 105.66, + "learning_rate": 9.438477537437604e-06, + "loss": 0.0135, + "step": 254000 + }, + { + "epoch": 105.87, + "learning_rate": 9.417678868552413e-06, + "loss": 0.012, + "step": 254500 + }, + { + "epoch": 106.0, + "eval_cer": 0.2416, + "eval_gen_len": 13.4994, + "eval_loss": 0.41192150115966797, + "eval_runtime": 275.6326, + "eval_samples_per_second": 11.631, + "eval_steps_per_second": 2.91, + "step": 254824 + }, + { + "epoch": 106.07, + "learning_rate": 9.396880199667223e-06, + "loss": 0.0114, + "step": 255000 + }, + { + "epoch": 106.28, + "learning_rate": 9.376123128119801e-06, + "loss": 0.0116, + "step": 255500 + }, + { + "epoch": 106.49, + "learning_rate": 9.355324459234609e-06, + "loss": 0.0093, + "step": 256000 + }, + { + "epoch": 106.7, + "learning_rate": 9.334525790349418e-06, + "loss": 0.0128, + "step": 256500 + }, + { + "epoch": 106.91, + "learning_rate": 9.313768718801998e-06, + "loss": 0.0133, + "step": 257000 + }, + { + "epoch": 107.0, + "eval_cer": 0.2476, + "eval_gen_len": 13.6494, + "eval_loss": 0.42318016290664673, + "eval_runtime": 280.8295, + "eval_samples_per_second": 11.416, + "eval_steps_per_second": 2.856, + "step": 257228 + }, + { + "epoch": 107.11, + "learning_rate": 9.292970049916805e-06, + "loss": 0.0097, + "step": 257500 + }, + { + "epoch": 107.32, + "learning_rate": 9.272171381031615e-06, + "loss": 0.012, + "step": 258000 + }, + { + "epoch": 107.53, + "learning_rate": 9.251372712146424e-06, + "loss": 0.0093, + "step": 258500 + }, + { + "epoch": 107.74, + "learning_rate": 9.230574043261232e-06, + "loss": 0.0103, + "step": 259000 + }, + { + "epoch": 107.95, + "learning_rate": 9.209775374376041e-06, + "loss": 0.0103, + "step": 259500 + }, + { + "epoch": 108.0, + "eval_cer": 0.2434, + "eval_gen_len": 13.4925, + "eval_loss": 0.425822377204895, + "eval_runtime": 284.9328, + "eval_samples_per_second": 11.252, + "eval_steps_per_second": 2.815, + "step": 259632 + }, + { + "epoch": 108.15, + "learning_rate": 9.188976705490849e-06, + "loss": 0.0108, + "step": 260000 + }, + { + "epoch": 108.36, + "learning_rate": 9.168178036605658e-06, + "loss": 0.0109, + "step": 260500 + }, + { + "epoch": 108.57, + "learning_rate": 9.147420965058236e-06, + "loss": 0.0125, + "step": 261000 + }, + { + "epoch": 108.78, + "learning_rate": 9.126622296173046e-06, + "loss": 0.0112, + "step": 261500 + }, + { + "epoch": 108.99, + "learning_rate": 9.105823627287854e-06, + "loss": 0.0116, + "step": 262000 + }, + { + "epoch": 109.0, + "eval_cer": 0.2462, + "eval_gen_len": 13.7115, + "eval_loss": 0.43990305066108704, + "eval_runtime": 285.2346, + "eval_samples_per_second": 11.24, + "eval_steps_per_second": 2.812, + "step": 262036 + }, + { + "epoch": 109.19, + "learning_rate": 9.085066555740433e-06, + "loss": 0.0088, + "step": 262500 + }, + { + "epoch": 109.4, + "learning_rate": 9.064267886855243e-06, + "loss": 0.0098, + "step": 263000 + }, + { + "epoch": 109.61, + "learning_rate": 9.04346921797005e-06, + "loss": 0.0106, + "step": 263500 + }, + { + "epoch": 109.82, + "learning_rate": 9.02267054908486e-06, + "loss": 0.0123, + "step": 264000 + }, + { + "epoch": 110.0, + "eval_cer": 0.2462, + "eval_gen_len": 13.6023, + "eval_loss": 0.41700801253318787, + "eval_runtime": 287.8449, + "eval_samples_per_second": 11.138, + "eval_steps_per_second": 2.786, + "step": 264440 + }, + { + "epoch": 110.02, + "learning_rate": 9.00187188019967e-06, + "loss": 0.0125, + "step": 264500 + }, + { + "epoch": 110.23, + "learning_rate": 8.981073211314477e-06, + "loss": 0.01, + "step": 265000 + }, + { + "epoch": 110.44, + "learning_rate": 8.960316139767055e-06, + "loss": 0.0099, + "step": 265500 + }, + { + "epoch": 110.65, + "learning_rate": 8.939517470881864e-06, + "loss": 0.0108, + "step": 266000 + }, + { + "epoch": 110.86, + "learning_rate": 8.918718801996674e-06, + "loss": 0.0109, + "step": 266500 + }, + { + "epoch": 111.0, + "eval_cer": 0.2476, + "eval_gen_len": 13.6859, + "eval_loss": 0.4497167766094208, + "eval_runtime": 326.8835, + "eval_samples_per_second": 9.808, + "eval_steps_per_second": 2.453, + "step": 266844 + }, + { + "epoch": 111.06, + "learning_rate": 8.897920133111481e-06, + "loss": 0.0106, + "step": 267000 + }, + { + "epoch": 111.27, + "learning_rate": 8.87712146422629e-06, + "loss": 0.0103, + "step": 267500 + }, + { + "epoch": 111.48, + "learning_rate": 8.856322795341098e-06, + "loss": 0.0104, + "step": 268000 + }, + { + "epoch": 111.69, + "learning_rate": 8.835524126455908e-06, + "loss": 0.0126, + "step": 268500 + }, + { + "epoch": 111.9, + "learning_rate": 8.814725457570717e-06, + "loss": 0.0115, + "step": 269000 + }, + { + "epoch": 112.0, + "eval_cer": 0.2528, + "eval_gen_len": 13.9145, + "eval_loss": 0.4540727734565735, + "eval_runtime": 317.49, + "eval_samples_per_second": 10.098, + "eval_steps_per_second": 2.526, + "step": 269248 + }, + { + "epoch": 112.1, + "learning_rate": 8.794009983361066e-06, + "loss": 0.0108, + "step": 269500 + }, + { + "epoch": 112.31, + "learning_rate": 8.773211314475875e-06, + "loss": 0.0092, + "step": 270000 + }, + { + "epoch": 112.52, + "learning_rate": 8.752412645590683e-06, + "loss": 0.0104, + "step": 270500 + }, + { + "epoch": 112.73, + "learning_rate": 8.731613976705492e-06, + "loss": 0.0091, + "step": 271000 + }, + { + "epoch": 112.94, + "learning_rate": 8.7108153078203e-06, + "loss": 0.0115, + "step": 271500 + }, + { + "epoch": 113.0, + "eval_cer": 0.2449, + "eval_gen_len": 13.4545, + "eval_loss": 0.4440736770629883, + "eval_runtime": 280.5689, + "eval_samples_per_second": 11.427, + "eval_steps_per_second": 2.858, + "step": 271652 + }, + { + "epoch": 113.14, + "learning_rate": 8.69001663893511e-06, + "loss": 0.0092, + "step": 272000 + }, + { + "epoch": 113.35, + "learning_rate": 8.669217970049919e-06, + "loss": 0.0092, + "step": 272500 + }, + { + "epoch": 113.56, + "learning_rate": 8.648419301164726e-06, + "loss": 0.0109, + "step": 273000 + }, + { + "epoch": 113.77, + "learning_rate": 8.627620632279536e-06, + "loss": 0.0098, + "step": 273500 + }, + { + "epoch": 113.98, + "learning_rate": 8.606821963394343e-06, + "loss": 0.0113, + "step": 274000 + }, + { + "epoch": 114.0, + "eval_cer": 0.2471, + "eval_gen_len": 13.5477, + "eval_loss": 0.4469629228115082, + "eval_runtime": 276.6766, + "eval_samples_per_second": 11.588, + "eval_steps_per_second": 2.899, + "step": 274056 + }, + { + "epoch": 114.18, + "learning_rate": 8.586064891846923e-06, + "loss": 0.0115, + "step": 274500 + }, + { + "epoch": 114.39, + "learning_rate": 8.565266222961731e-06, + "loss": 0.0077, + "step": 275000 + }, + { + "epoch": 114.6, + "learning_rate": 8.54446755407654e-06, + "loss": 0.0103, + "step": 275500 + }, + { + "epoch": 114.81, + "learning_rate": 8.523668885191348e-06, + "loss": 0.01, + "step": 276000 + }, + { + "epoch": 115.0, + "eval_cer": 0.2537, + "eval_gen_len": 13.9704, + "eval_loss": 0.47477516531944275, + "eval_runtime": 291.6272, + "eval_samples_per_second": 10.993, + "eval_steps_per_second": 2.75, + "step": 276460 + }, + { + "epoch": 115.02, + "learning_rate": 8.502911813643926e-06, + "loss": 0.0116, + "step": 276500 + }, + { + "epoch": 115.22, + "learning_rate": 8.482113144758736e-06, + "loss": 0.0102, + "step": 277000 + }, + { + "epoch": 115.43, + "learning_rate": 8.461314475873545e-06, + "loss": 0.0085, + "step": 277500 + }, + { + "epoch": 115.64, + "learning_rate": 8.440515806988353e-06, + "loss": 0.0104, + "step": 278000 + }, + { + "epoch": 115.85, + "learning_rate": 8.419758735440932e-06, + "loss": 0.0125, + "step": 278500 + }, + { + "epoch": 116.0, + "eval_cer": 0.2438, + "eval_gen_len": 13.4816, + "eval_loss": 0.4395754933357239, + "eval_runtime": 273.4039, + "eval_samples_per_second": 11.726, + "eval_steps_per_second": 2.933, + "step": 278864 + }, + { + "epoch": 116.06, + "learning_rate": 8.398960066555742e-06, + "loss": 0.0093, + "step": 279000 + }, + { + "epoch": 116.26, + "learning_rate": 8.37816139767055e-06, + "loss": 0.01, + "step": 279500 + }, + { + "epoch": 116.47, + "learning_rate": 8.357362728785359e-06, + "loss": 0.0087, + "step": 280000 + }, + { + "epoch": 116.68, + "learning_rate": 8.336564059900167e-06, + "loss": 0.0099, + "step": 280500 + }, + { + "epoch": 116.89, + "learning_rate": 8.315765391014976e-06, + "loss": 0.0101, + "step": 281000 + }, + { + "epoch": 117.0, + "eval_cer": 0.2464, + "eval_gen_len": 13.7723, + "eval_loss": 0.44487103819847107, + "eval_runtime": 278.0443, + "eval_samples_per_second": 11.531, + "eval_steps_per_second": 2.884, + "step": 281268 + }, + { + "epoch": 117.1, + "learning_rate": 8.295008319467554e-06, + "loss": 0.0094, + "step": 281500 + }, + { + "epoch": 117.3, + "learning_rate": 8.274209650582364e-06, + "loss": 0.0077, + "step": 282000 + }, + { + "epoch": 117.51, + "learning_rate": 8.253410981697171e-06, + "loss": 0.01, + "step": 282500 + }, + { + "epoch": 117.72, + "learning_rate": 8.23261231281198e-06, + "loss": 0.0113, + "step": 283000 + }, + { + "epoch": 117.93, + "learning_rate": 8.21181364392679e-06, + "loss": 0.0108, + "step": 283500 + }, + { + "epoch": 118.0, + "eval_cer": 0.249, + "eval_gen_len": 13.7711, + "eval_loss": 0.4563674330711365, + "eval_runtime": 286.5388, + "eval_samples_per_second": 11.189, + "eval_steps_per_second": 2.799, + "step": 283672 + }, + { + "epoch": 118.14, + "learning_rate": 8.191014975041598e-06, + "loss": 0.0093, + "step": 284000 + }, + { + "epoch": 118.34, + "learning_rate": 8.170216306156407e-06, + "loss": 0.0097, + "step": 284500 + }, + { + "epoch": 118.55, + "learning_rate": 8.149459234608985e-06, + "loss": 0.0088, + "step": 285000 + }, + { + "epoch": 118.76, + "learning_rate": 8.128660565723795e-06, + "loss": 0.0087, + "step": 285500 + }, + { + "epoch": 118.97, + "learning_rate": 8.107861896838602e-06, + "loss": 0.0121, + "step": 286000 + }, + { + "epoch": 119.0, + "eval_cer": 0.2484, + "eval_gen_len": 13.7848, + "eval_loss": 0.4589692950248718, + "eval_runtime": 285.9698, + "eval_samples_per_second": 11.211, + "eval_steps_per_second": 2.804, + "step": 286076 + }, + { + "epoch": 119.18, + "learning_rate": 8.087063227953412e-06, + "loss": 0.0092, + "step": 286500 + }, + { + "epoch": 119.38, + "learning_rate": 8.06626455906822e-06, + "loss": 0.0104, + "step": 287000 + }, + { + "epoch": 119.59, + "learning_rate": 8.045465890183029e-06, + "loss": 0.0094, + "step": 287500 + }, + { + "epoch": 119.8, + "learning_rate": 8.024667221297838e-06, + "loss": 0.0111, + "step": 288000 + }, + { + "epoch": 120.0, + "eval_cer": 0.2488, + "eval_gen_len": 13.8621, + "eval_loss": 0.46863117814064026, + "eval_runtime": 282.0826, + "eval_samples_per_second": 11.365, + "eval_steps_per_second": 2.843, + "step": 288480 + }, + { + "epoch": 120.01, + "learning_rate": 8.003868552412646e-06, + "loss": 0.0105, + "step": 288500 + }, + { + "epoch": 120.22, + "learning_rate": 7.983111480865226e-06, + "loss": 0.0086, + "step": 289000 + }, + { + "epoch": 120.42, + "learning_rate": 7.962312811980035e-06, + "loss": 0.0101, + "step": 289500 + }, + { + "epoch": 120.63, + "learning_rate": 7.941514143094843e-06, + "loss": 0.0103, + "step": 290000 + }, + { + "epoch": 120.84, + "learning_rate": 7.920715474209652e-06, + "loss": 0.009, + "step": 290500 + }, + { + "epoch": 121.0, + "eval_cer": 0.2451, + "eval_gen_len": 13.7648, + "eval_loss": 0.44192788004875183, + "eval_runtime": 276.6356, + "eval_samples_per_second": 11.589, + "eval_steps_per_second": 2.899, + "step": 290884 + }, + { + "epoch": 121.05, + "learning_rate": 7.89991680532446e-06, + "loss": 0.0103, + "step": 291000 + }, + { + "epoch": 121.26, + "learning_rate": 7.87915973377704e-06, + "loss": 0.0109, + "step": 291500 + }, + { + "epoch": 121.46, + "learning_rate": 7.858361064891847e-06, + "loss": 0.0096, + "step": 292000 + }, + { + "epoch": 121.67, + "learning_rate": 7.837603993344427e-06, + "loss": 0.0104, + "step": 292500 + }, + { + "epoch": 121.88, + "learning_rate": 7.816805324459236e-06, + "loss": 0.0093, + "step": 293000 + }, + { + "epoch": 122.0, + "eval_cer": 0.242, + "eval_gen_len": 13.6089, + "eval_loss": 0.43688440322875977, + "eval_runtime": 279.7745, + "eval_samples_per_second": 11.459, + "eval_steps_per_second": 2.867, + "step": 293288 + }, + { + "epoch": 122.09, + "learning_rate": 7.796006655574044e-06, + "loss": 0.0096, + "step": 293500 + }, + { + "epoch": 122.3, + "learning_rate": 7.775207986688853e-06, + "loss": 0.0083, + "step": 294000 + }, + { + "epoch": 122.5, + "learning_rate": 7.754409317803661e-06, + "loss": 0.0092, + "step": 294500 + }, + { + "epoch": 122.71, + "learning_rate": 7.73361064891847e-06, + "loss": 0.0119, + "step": 295000 + }, + { + "epoch": 122.92, + "learning_rate": 7.71281198003328e-06, + "loss": 0.0092, + "step": 295500 + }, + { + "epoch": 123.0, + "eval_cer": 0.2475, + "eval_gen_len": 13.6304, + "eval_loss": 0.4489113390445709, + "eval_runtime": 292.1033, + "eval_samples_per_second": 10.976, + "eval_steps_per_second": 2.746, + "step": 295692 + }, + { + "epoch": 123.13, + "learning_rate": 7.692054908485858e-06, + "loss": 0.0096, + "step": 296000 + }, + { + "epoch": 123.34, + "learning_rate": 7.671256239600666e-06, + "loss": 0.0093, + "step": 296500 + }, + { + "epoch": 123.54, + "learning_rate": 7.650457570715475e-06, + "loss": 0.0094, + "step": 297000 + }, + { + "epoch": 123.75, + "learning_rate": 7.629658901830283e-06, + "loss": 0.0111, + "step": 297500 + }, + { + "epoch": 123.96, + "learning_rate": 7.608860232945092e-06, + "loss": 0.0099, + "step": 298000 + }, + { + "epoch": 124.0, + "eval_cer": 0.2447, + "eval_gen_len": 13.6413, + "eval_loss": 0.45137402415275574, + "eval_runtime": 273.6913, + "eval_samples_per_second": 11.714, + "eval_steps_per_second": 2.93, + "step": 298096 + }, + { + "epoch": 124.17, + "learning_rate": 7.588061564059901e-06, + "loss": 0.0078, + "step": 298500 + }, + { + "epoch": 124.38, + "learning_rate": 7.56726289517471e-06, + "loss": 0.0093, + "step": 299000 + }, + { + "epoch": 124.58, + "learning_rate": 7.5464642262895185e-06, + "loss": 0.0089, + "step": 299500 + }, + { + "epoch": 124.79, + "learning_rate": 7.525707154742097e-06, + "loss": 0.01, + "step": 300000 + }, + { + "epoch": 125.0, + "learning_rate": 7.5049500831946766e-06, + "loss": 0.0103, + "step": 300500 + }, + { + "epoch": 125.0, + "eval_cer": 0.2467, + "eval_gen_len": 13.8041, + "eval_loss": 0.45419880747795105, + "eval_runtime": 277.7542, + "eval_samples_per_second": 11.543, + "eval_steps_per_second": 2.887, + "step": 300500 + }, + { + "epoch": 125.21, + "learning_rate": 7.484151414309485e-06, + "loss": 0.0076, + "step": 301000 + }, + { + "epoch": 125.42, + "learning_rate": 7.463352745424294e-06, + "loss": 0.0089, + "step": 301500 + }, + { + "epoch": 125.62, + "learning_rate": 7.442554076539102e-06, + "loss": 0.0089, + "step": 302000 + }, + { + "epoch": 125.83, + "learning_rate": 7.4217554076539115e-06, + "loss": 0.0121, + "step": 302500 + }, + { + "epoch": 126.0, + "eval_cer": 0.2496, + "eval_gen_len": 13.8525, + "eval_loss": 0.4686892330646515, + "eval_runtime": 288.7714, + "eval_samples_per_second": 11.102, + "eval_steps_per_second": 2.777, + "step": 302904 + }, + { + "epoch": 126.04, + "learning_rate": 7.40095673876872e-06, + "loss": 0.0091, + "step": 303000 + }, + { + "epoch": 126.25, + "learning_rate": 7.3801580698835285e-06, + "loss": 0.0089, + "step": 303500 + }, + { + "epoch": 126.46, + "learning_rate": 7.359359400998337e-06, + "loss": 0.0092, + "step": 304000 + }, + { + "epoch": 126.66, + "learning_rate": 7.338560732113146e-06, + "loss": 0.0081, + "step": 304500 + }, + { + "epoch": 126.87, + "learning_rate": 7.317845257903495e-06, + "loss": 0.0116, + "step": 305000 + }, + { + "epoch": 127.0, + "eval_cer": 0.2443, + "eval_gen_len": 13.6432, + "eval_loss": 0.4484730660915375, + "eval_runtime": 273.2803, + "eval_samples_per_second": 11.732, + "eval_steps_per_second": 2.935, + "step": 305308 + }, + { + "epoch": 127.08, + "learning_rate": 7.297046589018303e-06, + "loss": 0.0077, + "step": 305500 + }, + { + "epoch": 127.29, + "learning_rate": 7.276247920133111e-06, + "loss": 0.009, + "step": 306000 + }, + { + "epoch": 127.5, + "learning_rate": 7.25544925124792e-06, + "loss": 0.0094, + "step": 306500 + }, + { + "epoch": 127.7, + "learning_rate": 7.234650582362729e-06, + "loss": 0.0081, + "step": 307000 + }, + { + "epoch": 127.91, + "learning_rate": 7.213851913477538e-06, + "loss": 0.0105, + "step": 307500 + }, + { + "epoch": 128.0, + "eval_cer": 0.2437, + "eval_gen_len": 13.7661, + "eval_loss": 0.4494189918041229, + "eval_runtime": 282.0309, + "eval_samples_per_second": 11.368, + "eval_steps_per_second": 2.844, + "step": 307712 + }, + { + "epoch": 128.12, + "learning_rate": 7.193094841930118e-06, + "loss": 0.0085, + "step": 308000 + }, + { + "epoch": 128.33, + "learning_rate": 7.172296173044926e-06, + "loss": 0.0086, + "step": 308500 + }, + { + "epoch": 128.54, + "learning_rate": 7.151497504159735e-06, + "loss": 0.0091, + "step": 309000 + }, + { + "epoch": 128.74, + "learning_rate": 7.130698835274543e-06, + "loss": 0.0094, + "step": 309500 + }, + { + "epoch": 128.95, + "learning_rate": 7.109900166389352e-06, + "loss": 0.0087, + "step": 310000 + }, + { + "epoch": 129.0, + "eval_cer": 0.2465, + "eval_gen_len": 13.5352, + "eval_loss": 0.46537643671035767, + "eval_runtime": 272.0294, + "eval_samples_per_second": 11.785, + "eval_steps_per_second": 2.948, + "step": 310116 + }, + { + "epoch": 129.16, + "learning_rate": 7.08910149750416e-06, + "loss": 0.0088, + "step": 310500 + }, + { + "epoch": 129.37, + "learning_rate": 7.0683028286189696e-06, + "loss": 0.0076, + "step": 311000 + }, + { + "epoch": 129.58, + "learning_rate": 7.047504159733778e-06, + "loss": 0.0088, + "step": 311500 + }, + { + "epoch": 129.78, + "learning_rate": 7.026705490848587e-06, + "loss": 0.0092, + "step": 312000 + }, + { + "epoch": 129.99, + "learning_rate": 7.005948419301165e-06, + "loss": 0.0106, + "step": 312500 + }, + { + "epoch": 130.0, + "eval_cer": 0.2457, + "eval_gen_len": 13.5667, + "eval_loss": 0.44364768266677856, + "eval_runtime": 269.0291, + "eval_samples_per_second": 11.917, + "eval_steps_per_second": 2.981, + "step": 312520 + }, + { + "epoch": 130.2, + "learning_rate": 6.985149750415974e-06, + "loss": 0.008, + "step": 313000 + }, + { + "epoch": 130.41, + "learning_rate": 6.964351081530783e-06, + "loss": 0.0072, + "step": 313500 + }, + { + "epoch": 130.62, + "learning_rate": 6.943552412645591e-06, + "loss": 0.0091, + "step": 314000 + }, + { + "epoch": 130.82, + "learning_rate": 6.9227537437604e-06, + "loss": 0.0087, + "step": 314500 + }, + { + "epoch": 131.0, + "eval_cer": 0.2451, + "eval_gen_len": 13.7358, + "eval_loss": 0.4612971544265747, + "eval_runtime": 274.1913, + "eval_samples_per_second": 11.693, + "eval_steps_per_second": 2.925, + "step": 314924 + }, + { + "epoch": 131.03, + "learning_rate": 6.90199667221298e-06, + "loss": 0.0107, + "step": 315000 + }, + { + "epoch": 131.24, + "learning_rate": 6.881239600665558e-06, + "loss": 0.0082, + "step": 315500 + }, + { + "epoch": 131.45, + "learning_rate": 6.860440931780366e-06, + "loss": 0.0089, + "step": 316000 + }, + { + "epoch": 131.66, + "learning_rate": 6.839642262895176e-06, + "loss": 0.009, + "step": 316500 + }, + { + "epoch": 131.86, + "learning_rate": 6.818843594009984e-06, + "loss": 0.0104, + "step": 317000 + }, + { + "epoch": 132.0, + "eval_cer": 0.2468, + "eval_gen_len": 13.5936, + "eval_loss": 0.4653697907924652, + "eval_runtime": 277.9546, + "eval_samples_per_second": 11.534, + "eval_steps_per_second": 2.885, + "step": 317328 + }, + { + "epoch": 132.07, + "learning_rate": 6.798044925124793e-06, + "loss": 0.0094, + "step": 317500 + }, + { + "epoch": 132.28, + "learning_rate": 6.777246256239601e-06, + "loss": 0.0079, + "step": 318000 + }, + { + "epoch": 132.49, + "learning_rate": 6.75644758735441e-06, + "loss": 0.0098, + "step": 318500 + }, + { + "epoch": 132.7, + "learning_rate": 6.735648918469218e-06, + "loss": 0.0081, + "step": 319000 + }, + { + "epoch": 132.9, + "learning_rate": 6.714891846921797e-06, + "loss": 0.0089, + "step": 319500 + }, + { + "epoch": 133.0, + "eval_cer": 0.2455, + "eval_gen_len": 13.6875, + "eval_loss": 0.45620593428611755, + "eval_runtime": 284.3664, + "eval_samples_per_second": 11.274, + "eval_steps_per_second": 2.82, + "step": 319732 + }, + { + "epoch": 133.11, + "learning_rate": 6.694093178036606e-06, + "loss": 0.0085, + "step": 320000 + }, + { + "epoch": 133.32, + "learning_rate": 6.673294509151414e-06, + "loss": 0.0091, + "step": 320500 + }, + { + "epoch": 133.53, + "learning_rate": 6.652495840266223e-06, + "loss": 0.0073, + "step": 321000 + }, + { + "epoch": 133.74, + "learning_rate": 6.631738768718803e-06, + "loss": 0.0091, + "step": 321500 + }, + { + "epoch": 133.94, + "learning_rate": 6.610940099833611e-06, + "loss": 0.0088, + "step": 322000 + }, + { + "epoch": 134.0, + "eval_cer": 0.2455, + "eval_gen_len": 13.6653, + "eval_loss": 0.4463290274143219, + "eval_runtime": 265.7595, + "eval_samples_per_second": 12.064, + "eval_steps_per_second": 3.018, + "step": 322136 + }, + { + "epoch": 134.15, + "learning_rate": 6.59014143094842e-06, + "loss": 0.0082, + "step": 322500 + }, + { + "epoch": 134.36, + "learning_rate": 6.569342762063229e-06, + "loss": 0.0087, + "step": 323000 + }, + { + "epoch": 134.57, + "learning_rate": 6.548544093178038e-06, + "loss": 0.009, + "step": 323500 + }, + { + "epoch": 134.78, + "learning_rate": 6.527745424292846e-06, + "loss": 0.0093, + "step": 324000 + }, + { + "epoch": 134.98, + "learning_rate": 6.506946755407655e-06, + "loss": 0.0088, + "step": 324500 + }, + { + "epoch": 135.0, + "eval_cer": 0.245, + "eval_gen_len": 13.5749, + "eval_loss": 0.4489509165287018, + "eval_runtime": 256.739, + "eval_samples_per_second": 12.487, + "eval_steps_per_second": 3.124, + "step": 324540 + }, + { + "epoch": 135.19, + "learning_rate": 6.486148086522463e-06, + "loss": 0.0076, + "step": 325000 + }, + { + "epoch": 135.4, + "learning_rate": 6.465349417637273e-06, + "loss": 0.0078, + "step": 325500 + }, + { + "epoch": 135.61, + "learning_rate": 6.444550748752081e-06, + "loss": 0.007, + "step": 326000 + }, + { + "epoch": 135.82, + "learning_rate": 6.423793677204659e-06, + "loss": 0.0086, + "step": 326500 + }, + { + "epoch": 136.0, + "eval_cer": 0.2458, + "eval_gen_len": 13.8032, + "eval_loss": 0.45637834072113037, + "eval_runtime": 275.2079, + "eval_samples_per_second": 11.649, + "eval_steps_per_second": 2.914, + "step": 326944 + }, + { + "epoch": 136.02, + "learning_rate": 6.402995008319468e-06, + "loss": 0.0097, + "step": 327000 + }, + { + "epoch": 136.23, + "learning_rate": 6.382237936772047e-06, + "loss": 0.0084, + "step": 327500 + }, + { + "epoch": 136.44, + "learning_rate": 6.361439267886855e-06, + "loss": 0.0084, + "step": 328000 + }, + { + "epoch": 136.65, + "learning_rate": 6.340640599001664e-06, + "loss": 0.0083, + "step": 328500 + }, + { + "epoch": 136.86, + "learning_rate": 6.319841930116472e-06, + "loss": 0.0083, + "step": 329000 + }, + { + "epoch": 137.0, + "eval_cer": 0.2471, + "eval_gen_len": 13.6478, + "eval_loss": 0.4573554992675781, + "eval_runtime": 269.5066, + "eval_samples_per_second": 11.896, + "eval_steps_per_second": 2.976, + "step": 329348 + }, + { + "epoch": 137.06, + "learning_rate": 6.299043261231281e-06, + "loss": 0.0089, + "step": 329500 + }, + { + "epoch": 137.27, + "learning_rate": 6.27824459234609e-06, + "loss": 0.0087, + "step": 330000 + }, + { + "epoch": 137.48, + "learning_rate": 6.257445923460899e-06, + "loss": 0.0086, + "step": 330500 + }, + { + "epoch": 137.69, + "learning_rate": 6.236647254575707e-06, + "loss": 0.0085, + "step": 331000 + }, + { + "epoch": 137.9, + "learning_rate": 6.215890183028287e-06, + "loss": 0.0092, + "step": 331500 + }, + { + "epoch": 138.0, + "eval_cer": 0.2487, + "eval_gen_len": 13.8503, + "eval_loss": 0.46958354115486145, + "eval_runtime": 265.4531, + "eval_samples_per_second": 12.077, + "eval_steps_per_second": 3.021, + "step": 331752 + }, + { + "epoch": 138.1, + "learning_rate": 6.195091514143096e-06, + "loss": 0.0079, + "step": 332000 + }, + { + "epoch": 138.31, + "learning_rate": 6.174292845257904e-06, + "loss": 0.0073, + "step": 332500 + }, + { + "epoch": 138.52, + "learning_rate": 6.153494176372713e-06, + "loss": 0.0074, + "step": 333000 + }, + { + "epoch": 138.73, + "learning_rate": 6.132695507487521e-06, + "loss": 0.0089, + "step": 333500 + }, + { + "epoch": 138.94, + "learning_rate": 6.111896838602331e-06, + "loss": 0.0082, + "step": 334000 + }, + { + "epoch": 139.0, + "eval_cer": 0.2476, + "eval_gen_len": 13.7916, + "eval_loss": 0.4610365629196167, + "eval_runtime": 273.061, + "eval_samples_per_second": 11.741, + "eval_steps_per_second": 2.937, + "step": 334156 + }, + { + "epoch": 139.14, + "learning_rate": 6.091098169717139e-06, + "loss": 0.0076, + "step": 334500 + }, + { + "epoch": 139.35, + "learning_rate": 6.070299500831948e-06, + "loss": 0.0083, + "step": 335000 + }, + { + "epoch": 139.56, + "learning_rate": 6.049542429284526e-06, + "loss": 0.0088, + "step": 335500 + }, + { + "epoch": 139.77, + "learning_rate": 6.0287437603993344e-06, + "loss": 0.0078, + "step": 336000 + }, + { + "epoch": 139.98, + "learning_rate": 6.007945091514144e-06, + "loss": 0.0092, + "step": 336500 + }, + { + "epoch": 140.0, + "eval_cer": 0.2432, + "eval_gen_len": 13.6344, + "eval_loss": 0.43919724225997925, + "eval_runtime": 265.4301, + "eval_samples_per_second": 12.079, + "eval_steps_per_second": 3.022, + "step": 336560 + }, + { + "epoch": 140.18, + "learning_rate": 5.987146422628952e-06, + "loss": 0.007, + "step": 337000 + }, + { + "epoch": 140.39, + "learning_rate": 5.966347753743761e-06, + "loss": 0.0078, + "step": 337500 + }, + { + "epoch": 140.6, + "learning_rate": 5.945590682196339e-06, + "loss": 0.0087, + "step": 338000 + }, + { + "epoch": 140.81, + "learning_rate": 5.924792013311148e-06, + "loss": 0.0083, + "step": 338500 + }, + { + "epoch": 141.0, + "eval_cer": 0.2461, + "eval_gen_len": 13.733, + "eval_loss": 0.4848983883857727, + "eval_runtime": 269.6844, + "eval_samples_per_second": 11.888, + "eval_steps_per_second": 2.974, + "step": 338964 + }, + { + "epoch": 141.01, + "learning_rate": 5.904034941763727e-06, + "loss": 0.0083, + "step": 339000 + }, + { + "epoch": 141.22, + "learning_rate": 5.883236272878537e-06, + "loss": 0.0079, + "step": 339500 + }, + { + "epoch": 141.43, + "learning_rate": 5.862437603993345e-06, + "loss": 0.007, + "step": 340000 + }, + { + "epoch": 141.64, + "learning_rate": 5.841638935108154e-06, + "loss": 0.0085, + "step": 340500 + }, + { + "epoch": 141.85, + "learning_rate": 5.820840266222962e-06, + "loss": 0.0085, + "step": 341000 + }, + { + "epoch": 142.0, + "eval_cer": 0.2475, + "eval_gen_len": 13.8278, + "eval_loss": 0.46004167199134827, + "eval_runtime": 262.9827, + "eval_samples_per_second": 12.191, + "eval_steps_per_second": 3.05, + "step": 341368 + }, + { + "epoch": 142.05, + "learning_rate": 5.800041597337771e-06, + "loss": 0.0079, + "step": 341500 + }, + { + "epoch": 142.26, + "learning_rate": 5.779242928452579e-06, + "loss": 0.0078, + "step": 342000 + }, + { + "epoch": 142.47, + "learning_rate": 5.758444259567389e-06, + "loss": 0.0083, + "step": 342500 + }, + { + "epoch": 142.68, + "learning_rate": 5.737645590682197e-06, + "loss": 0.0072, + "step": 343000 + }, + { + "epoch": 142.89, + "learning_rate": 5.7168885191347755e-06, + "loss": 0.008, + "step": 343500 + }, + { + "epoch": 143.0, + "eval_cer": 0.2455, + "eval_gen_len": 13.7137, + "eval_loss": 0.4594569206237793, + "eval_runtime": 270.269, + "eval_samples_per_second": 11.862, + "eval_steps_per_second": 2.967, + "step": 343772 + }, + { + "epoch": 143.09, + "learning_rate": 5.696089850249584e-06, + "loss": 0.0085, + "step": 344000 + }, + { + "epoch": 143.3, + "learning_rate": 5.675291181364393e-06, + "loss": 0.0082, + "step": 344500 + }, + { + "epoch": 143.51, + "learning_rate": 5.654492512479202e-06, + "loss": 0.0078, + "step": 345000 + }, + { + "epoch": 143.72, + "learning_rate": 5.633735440931781e-06, + "loss": 0.0091, + "step": 345500 + }, + { + "epoch": 143.93, + "learning_rate": 5.61293677204659e-06, + "loss": 0.0084, + "step": 346000 + }, + { + "epoch": 144.0, + "eval_cer": 0.2419, + "eval_gen_len": 13.684, + "eval_loss": 0.44198158383369446, + "eval_runtime": 272.3122, + "eval_samples_per_second": 11.773, + "eval_steps_per_second": 2.945, + "step": 346176 + }, + { + "epoch": 144.13, + "learning_rate": 5.592138103161399e-06, + "loss": 0.0073, + "step": 346500 + }, + { + "epoch": 144.34, + "learning_rate": 5.571339434276207e-06, + "loss": 0.0076, + "step": 347000 + }, + { + "epoch": 144.55, + "learning_rate": 5.550540765391016e-06, + "loss": 0.0075, + "step": 347500 + }, + { + "epoch": 144.76, + "learning_rate": 5.529783693843595e-06, + "loss": 0.0092, + "step": 348000 + }, + { + "epoch": 144.97, + "learning_rate": 5.508985024958403e-06, + "loss": 0.0087, + "step": 348500 + }, + { + "epoch": 145.0, + "eval_cer": 0.2435, + "eval_gen_len": 13.5817, + "eval_loss": 0.4379993975162506, + "eval_runtime": 266.6103, + "eval_samples_per_second": 12.025, + "eval_steps_per_second": 3.008, + "step": 348580 + }, + { + "epoch": 145.17, + "learning_rate": 5.488186356073212e-06, + "loss": 0.007, + "step": 349000 + }, + { + "epoch": 145.38, + "learning_rate": 5.46738768718802e-06, + "loss": 0.0071, + "step": 349500 + }, + { + "epoch": 145.59, + "learning_rate": 5.4466306156405994e-06, + "loss": 0.0086, + "step": 350000 + }, + { + "epoch": 145.8, + "learning_rate": 5.425831946755408e-06, + "loss": 0.0074, + "step": 350500 + }, + { + "epoch": 146.0, + "eval_cer": 0.2438, + "eval_gen_len": 13.747, + "eval_loss": 0.4474620223045349, + "eval_runtime": 259.9915, + "eval_samples_per_second": 12.331, + "eval_steps_per_second": 3.085, + "step": 350984 + }, + { + "epoch": 146.01, + "learning_rate": 5.4050332778702165e-06, + "loss": 0.0085, + "step": 351000 + }, + { + "epoch": 146.21, + "learning_rate": 5.384234608985025e-06, + "loss": 0.0071, + "step": 351500 + }, + { + "epoch": 146.42, + "learning_rate": 5.3634359400998335e-06, + "loss": 0.0065, + "step": 352000 + }, + { + "epoch": 146.63, + "learning_rate": 5.342637271214642e-06, + "loss": 0.0076, + "step": 352500 + }, + { + "epoch": 146.84, + "learning_rate": 5.3218386023294514e-06, + "loss": 0.0076, + "step": 353000 + }, + { + "epoch": 147.0, + "eval_cer": 0.2433, + "eval_gen_len": 13.5989, + "eval_loss": 0.4507221579551697, + "eval_runtime": 259.806, + "eval_samples_per_second": 12.34, + "eval_steps_per_second": 3.087, + "step": 353388 + }, + { + "epoch": 147.05, + "learning_rate": 5.30103993344426e-06, + "loss": 0.0078, + "step": 353500 + }, + { + "epoch": 147.25, + "learning_rate": 5.2802412645590685e-06, + "loss": 0.0077, + "step": 354000 + }, + { + "epoch": 147.46, + "learning_rate": 5.259484193011648e-06, + "loss": 0.0075, + "step": 354500 + }, + { + "epoch": 147.67, + "learning_rate": 5.238685524126457e-06, + "loss": 0.0068, + "step": 355000 + }, + { + "epoch": 147.88, + "learning_rate": 5.217886855241265e-06, + "loss": 0.0091, + "step": 355500 + }, + { + "epoch": 148.0, + "eval_cer": 0.2461, + "eval_gen_len": 13.8125, + "eval_loss": 0.47150808572769165, + "eval_runtime": 262.9368, + "eval_samples_per_second": 12.193, + "eval_steps_per_second": 3.05, + "step": 355792 + }, + { + "epoch": 148.09, + "learning_rate": 5.1971297836938436e-06, + "loss": 0.0082, + "step": 356000 + }, + { + "epoch": 148.29, + "learning_rate": 5.176331114808653e-06, + "loss": 0.0066, + "step": 356500 + }, + { + "epoch": 148.5, + "learning_rate": 5.1555324459234614e-06, + "loss": 0.0081, + "step": 357000 + }, + { + "epoch": 148.71, + "learning_rate": 5.13473377703827e-06, + "loss": 0.0074, + "step": 357500 + }, + { + "epoch": 148.92, + "learning_rate": 5.1139351081530785e-06, + "loss": 0.0078, + "step": 358000 + }, + { + "epoch": 149.0, + "eval_cer": 0.2472, + "eval_gen_len": 13.8253, + "eval_loss": 0.46047914028167725, + "eval_runtime": 271.5636, + "eval_samples_per_second": 11.806, + "eval_steps_per_second": 2.953, + "step": 358196 + }, + { + "epoch": 149.13, + "learning_rate": 5.093136439267887e-06, + "loss": 0.0076, + "step": 358500 + }, + { + "epoch": 149.33, + "learning_rate": 5.0723377703826955e-06, + "loss": 0.0084, + "step": 359000 + }, + { + "epoch": 149.54, + "learning_rate": 5.051539101497505e-06, + "loss": 0.0081, + "step": 359500 + }, + { + "epoch": 149.75, + "learning_rate": 5.0307404326123134e-06, + "loss": 0.0075, + "step": 360000 + }, + { + "epoch": 149.96, + "learning_rate": 5.009941763727122e-06, + "loss": 0.0075, + "step": 360500 + }, + { + "epoch": 150.0, + "eval_cer": 0.2416, + "eval_gen_len": 13.4729, + "eval_loss": 0.4448852837085724, + "eval_runtime": 262.3745, + "eval_samples_per_second": 12.219, + "eval_steps_per_second": 3.057, + "step": 360600 + }, + { + "epoch": 150.17, + "learning_rate": 4.989184692179701e-06, + "loss": 0.0078, + "step": 361000 + }, + { + "epoch": 150.37, + "learning_rate": 4.9683860232945095e-06, + "loss": 0.0065, + "step": 361500 + }, + { + "epoch": 150.58, + "learning_rate": 4.947587354409318e-06, + "loss": 0.0077, + "step": 362000 + }, + { + "epoch": 150.79, + "learning_rate": 4.926788685524127e-06, + "loss": 0.008, + "step": 362500 + }, + { + "epoch": 151.0, + "learning_rate": 4.905990016638936e-06, + "loss": 0.0076, + "step": 363000 + }, + { + "epoch": 151.0, + "eval_cer": 0.2458, + "eval_gen_len": 13.7838, + "eval_loss": 0.47447100281715393, + "eval_runtime": 262.0741, + "eval_samples_per_second": 12.233, + "eval_steps_per_second": 3.06, + "step": 363004 + }, + { + "epoch": 151.21, + "learning_rate": 4.885232945091515e-06, + "loss": 0.0075, + "step": 363500 + }, + { + "epoch": 151.41, + "learning_rate": 4.8644342762063235e-06, + "loss": 0.0074, + "step": 364000 + }, + { + "epoch": 151.62, + "learning_rate": 4.843635607321132e-06, + "loss": 0.0077, + "step": 364500 + }, + { + "epoch": 151.83, + "learning_rate": 4.822878535773711e-06, + "loss": 0.0076, + "step": 365000 + }, + { + "epoch": 152.0, + "eval_cer": 0.2461, + "eval_gen_len": 13.7093, + "eval_loss": 0.4570690095424652, + "eval_runtime": 264.709, + "eval_samples_per_second": 12.111, + "eval_steps_per_second": 3.03, + "step": 365408 + }, + { + "epoch": 152.04, + "learning_rate": 4.8020798668885195e-06, + "loss": 0.0075, + "step": 365500 + }, + { + "epoch": 152.25, + "learning_rate": 4.781281198003328e-06, + "loss": 0.0066, + "step": 366000 + }, + { + "epoch": 152.45, + "learning_rate": 4.7604825291181366e-06, + "loss": 0.0079, + "step": 366500 + }, + { + "epoch": 152.66, + "learning_rate": 4.739683860232945e-06, + "loss": 0.0076, + "step": 367000 + }, + { + "epoch": 152.87, + "learning_rate": 4.718885191347754e-06, + "loss": 0.0088, + "step": 367500 + }, + { + "epoch": 153.0, + "eval_cer": 0.2414, + "eval_gen_len": 13.5468, + "eval_loss": 0.45958101749420166, + "eval_runtime": 263.6085, + "eval_samples_per_second": 12.162, + "eval_steps_per_second": 3.042, + "step": 367812 + }, + { + "epoch": 153.08, + "learning_rate": 4.6981281198003335e-06, + "loss": 0.0081, + "step": 368000 + }, + { + "epoch": 153.29, + "learning_rate": 4.677329450915142e-06, + "loss": 0.0072, + "step": 368500 + }, + { + "epoch": 153.49, + "learning_rate": 4.6565307820299505e-06, + "loss": 0.007, + "step": 369000 + }, + { + "epoch": 153.7, + "learning_rate": 4.635732113144759e-06, + "loss": 0.0068, + "step": 369500 + }, + { + "epoch": 153.91, + "learning_rate": 4.6149334442595676e-06, + "loss": 0.0082, + "step": 370000 + }, + { + "epoch": 154.0, + "eval_cer": 0.2475, + "eval_gen_len": 13.8384, + "eval_loss": 0.46518319845199585, + "eval_runtime": 270.3025, + "eval_samples_per_second": 11.861, + "eval_steps_per_second": 2.967, + "step": 370216 + }, + { + "epoch": 154.12, + "learning_rate": 4.594134775374376e-06, + "loss": 0.0067, + "step": 370500 + }, + { + "epoch": 154.33, + "learning_rate": 4.5733361064891855e-06, + "loss": 0.0073, + "step": 371000 + }, + { + "epoch": 154.53, + "learning_rate": 4.552537437603994e-06, + "loss": 0.0083, + "step": 371500 + }, + { + "epoch": 154.74, + "learning_rate": 4.5317387687188025e-06, + "loss": 0.0074, + "step": 372000 + }, + { + "epoch": 154.95, + "learning_rate": 4.510940099833611e-06, + "loss": 0.0077, + "step": 372500 + }, + { + "epoch": 155.0, + "eval_cer": 0.2426, + "eval_gen_len": 13.6457, + "eval_loss": 0.46483084559440613, + "eval_runtime": 258.2815, + "eval_samples_per_second": 12.413, + "eval_steps_per_second": 3.105, + "step": 372620 + }, + { + "epoch": 155.16, + "learning_rate": 4.49018302828619e-06, + "loss": 0.0072, + "step": 373000 + }, + { + "epoch": 155.37, + "learning_rate": 4.469384359400999e-06, + "loss": 0.0066, + "step": 373500 + }, + { + "epoch": 155.57, + "learning_rate": 4.448627287853578e-06, + "loss": 0.0074, + "step": 374000 + }, + { + "epoch": 155.78, + "learning_rate": 4.427828618968386e-06, + "loss": 0.0075, + "step": 374500 + }, + { + "epoch": 155.99, + "learning_rate": 4.407029950083195e-06, + "loss": 0.0074, + "step": 375000 + }, + { + "epoch": 156.0, + "eval_cer": 0.2422, + "eval_gen_len": 13.5889, + "eval_loss": 0.4521370232105255, + "eval_runtime": 266.1373, + "eval_samples_per_second": 12.046, + "eval_steps_per_second": 3.013, + "step": 375024 + }, + { + "epoch": 156.2, + "learning_rate": 4.386231281198003e-06, + "loss": 0.0072, + "step": 375500 + }, + { + "epoch": 156.41, + "learning_rate": 4.3654326123128125e-06, + "loss": 0.0073, + "step": 376000 + }, + { + "epoch": 156.61, + "learning_rate": 4.344633943427621e-06, + "loss": 0.0076, + "step": 376500 + }, + { + "epoch": 156.82, + "learning_rate": 4.32383527454243e-06, + "loss": 0.0073, + "step": 377000 + }, + { + "epoch": 157.0, + "eval_cer": 0.2465, + "eval_gen_len": 13.7867, + "eval_loss": 0.47169268131256104, + "eval_runtime": 264.268, + "eval_samples_per_second": 12.132, + "eval_steps_per_second": 3.035, + "step": 377428 + }, + { + "epoch": 157.03, + "learning_rate": 4.303036605657238e-06, + "loss": 0.0075, + "step": 377500 + }, + { + "epoch": 157.24, + "learning_rate": 4.282237936772047e-06, + "loss": 0.0063, + "step": 378000 + }, + { + "epoch": 157.45, + "learning_rate": 4.261480865224626e-06, + "loss": 0.0069, + "step": 378500 + }, + { + "epoch": 157.65, + "learning_rate": 4.240682196339434e-06, + "loss": 0.0078, + "step": 379000 + }, + { + "epoch": 157.86, + "learning_rate": 4.2198835274542435e-06, + "loss": 0.0078, + "step": 379500 + }, + { + "epoch": 158.0, + "eval_cer": 0.243, + "eval_gen_len": 13.7143, + "eval_loss": 0.46363481879234314, + "eval_runtime": 263.4944, + "eval_samples_per_second": 12.167, + "eval_steps_per_second": 3.044, + "step": 379832 + }, + { + "epoch": 158.07, + "learning_rate": 4.199084858569052e-06, + "loss": 0.0066, + "step": 380000 + }, + { + "epoch": 158.28, + "learning_rate": 4.178286189683861e-06, + "loss": 0.0073, + "step": 380500 + }, + { + "epoch": 158.49, + "learning_rate": 4.15752911813644e-06, + "loss": 0.0064, + "step": 381000 + }, + { + "epoch": 158.69, + "learning_rate": 4.136730449251248e-06, + "loss": 0.008, + "step": 381500 + }, + { + "epoch": 158.9, + "learning_rate": 4.115931780366057e-06, + "loss": 0.007, + "step": 382000 + }, + { + "epoch": 159.0, + "eval_cer": 0.2413, + "eval_gen_len": 13.6556, + "eval_loss": 0.4499606192111969, + "eval_runtime": 265.7001, + "eval_samples_per_second": 12.066, + "eval_steps_per_second": 3.018, + "step": 382236 + }, + { + "epoch": 159.11, + "learning_rate": 4.095133111480866e-06, + "loss": 0.0074, + "step": 382500 + }, + { + "epoch": 159.32, + "learning_rate": 4.0743344425956745e-06, + "loss": 0.0064, + "step": 383000 + }, + { + "epoch": 159.53, + "learning_rate": 4.053535773710483e-06, + "loss": 0.0068, + "step": 383500 + }, + { + "epoch": 159.73, + "learning_rate": 4.032778702163062e-06, + "loss": 0.0068, + "step": 384000 + }, + { + "epoch": 159.94, + "learning_rate": 4.011980033277871e-06, + "loss": 0.0079, + "step": 384500 + }, + { + "epoch": 160.0, + "eval_cer": 0.241, + "eval_gen_len": 13.622, + "eval_loss": 0.4524941146373749, + "eval_runtime": 263.8331, + "eval_samples_per_second": 12.152, + "eval_steps_per_second": 3.04, + "step": 384640 + }, + { + "epoch": 160.15, + "learning_rate": 3.991181364392679e-06, + "loss": 0.0076, + "step": 385000 + }, + { + "epoch": 160.36, + "learning_rate": 3.9703826955074885e-06, + "loss": 0.0062, + "step": 385500 + }, + { + "epoch": 160.57, + "learning_rate": 3.949584026622296e-06, + "loss": 0.0066, + "step": 386000 + }, + { + "epoch": 160.77, + "learning_rate": 3.928785357737105e-06, + "loss": 0.0067, + "step": 386500 + }, + { + "epoch": 160.98, + "learning_rate": 3.908069883527454e-06, + "loss": 0.0074, + "step": 387000 + }, + { + "epoch": 161.0, + "eval_cer": 0.2439, + "eval_gen_len": 13.7757, + "eval_loss": 0.4422759711742401, + "eval_runtime": 268.4253, + "eval_samples_per_second": 11.944, + "eval_steps_per_second": 2.988, + "step": 387044 + }, + { + "epoch": 161.19, + "learning_rate": 3.887271214642263e-06, + "loss": 0.008, + "step": 387500 + }, + { + "epoch": 161.4, + "learning_rate": 3.866472545757072e-06, + "loss": 0.0068, + "step": 388000 + }, + { + "epoch": 161.61, + "learning_rate": 3.845673876871881e-06, + "loss": 0.0066, + "step": 388500 + }, + { + "epoch": 161.81, + "learning_rate": 3.824875207986689e-06, + "loss": 0.0081, + "step": 389000 + }, + { + "epoch": 162.0, + "eval_cer": 0.2433, + "eval_gen_len": 13.7096, + "eval_loss": 0.4686408042907715, + "eval_runtime": 266.054, + "eval_samples_per_second": 12.05, + "eval_steps_per_second": 3.014, + "step": 389448 + }, + { + "epoch": 162.02, + "learning_rate": 3.804118136439268e-06, + "loss": 0.0073, + "step": 389500 + }, + { + "epoch": 162.23, + "learning_rate": 3.7833194675540767e-06, + "loss": 0.0057, + "step": 390000 + }, + { + "epoch": 162.44, + "learning_rate": 3.7625207986688856e-06, + "loss": 0.007, + "step": 390500 + }, + { + "epoch": 162.65, + "learning_rate": 3.741722129783694e-06, + "loss": 0.0074, + "step": 391000 + }, + { + "epoch": 162.85, + "learning_rate": 3.7209234608985027e-06, + "loss": 0.0067, + "step": 391500 + }, + { + "epoch": 163.0, + "eval_cer": 0.2422, + "eval_gen_len": 13.68, + "eval_loss": 0.4667229950428009, + "eval_runtime": 268.59, + "eval_samples_per_second": 11.936, + "eval_steps_per_second": 2.986, + "step": 391852 + }, + { + "epoch": 163.06, + "learning_rate": 3.7001247920133116e-06, + "loss": 0.0081, + "step": 392000 + }, + { + "epoch": 163.27, + "learning_rate": 3.67932612312812e-06, + "loss": 0.0067, + "step": 392500 + }, + { + "epoch": 163.48, + "learning_rate": 3.6585274542429287e-06, + "loss": 0.0064, + "step": 393000 + }, + { + "epoch": 163.69, + "learning_rate": 3.6377287853577376e-06, + "loss": 0.0073, + "step": 393500 + }, + { + "epoch": 163.89, + "learning_rate": 3.6169717138103167e-06, + "loss": 0.0074, + "step": 394000 + }, + { + "epoch": 164.0, + "eval_cer": 0.2418, + "eval_gen_len": 13.6482, + "eval_loss": 0.4521939754486084, + "eval_runtime": 273.0322, + "eval_samples_per_second": 11.742, + "eval_steps_per_second": 2.937, + "step": 394256 + }, + { + "epoch": 164.1, + "learning_rate": 3.596173044925125e-06, + "loss": 0.007, + "step": 394500 + }, + { + "epoch": 164.31, + "learning_rate": 3.5754159733777038e-06, + "loss": 0.0072, + "step": 395000 + }, + { + "epoch": 164.52, + "learning_rate": 3.5546173044925127e-06, + "loss": 0.0064, + "step": 395500 + }, + { + "epoch": 164.73, + "learning_rate": 3.5338186356073212e-06, + "loss": 0.0071, + "step": 396000 + }, + { + "epoch": 164.93, + "learning_rate": 3.5130199667221298e-06, + "loss": 0.0085, + "step": 396500 + }, + { + "epoch": 165.0, + "eval_cer": 0.2414, + "eval_gen_len": 13.6282, + "eval_loss": 0.45710650086402893, + "eval_runtime": 255.6061, + "eval_samples_per_second": 12.543, + "eval_steps_per_second": 3.138, + "step": 396660 + }, + { + "epoch": 165.14, + "learning_rate": 3.4922212978369387e-06, + "loss": 0.0063, + "step": 397000 + }, + { + "epoch": 165.35, + "learning_rate": 3.4714226289517472e-06, + "loss": 0.0068, + "step": 397500 + }, + { + "epoch": 165.56, + "learning_rate": 3.4506239600665558e-06, + "loss": 0.0072, + "step": 398000 + }, + { + "epoch": 165.77, + "learning_rate": 3.4298668885191348e-06, + "loss": 0.0065, + "step": 398500 + }, + { + "epoch": 165.97, + "learning_rate": 3.4090682196339437e-06, + "loss": 0.0074, + "step": 399000 + }, + { + "epoch": 166.0, + "eval_cer": 0.241, + "eval_gen_len": 13.6073, + "eval_loss": 0.45994168519973755, + "eval_runtime": 256.591, + "eval_samples_per_second": 12.495, + "eval_steps_per_second": 3.126, + "step": 399064 + }, + { + "epoch": 166.18, + "learning_rate": 3.3882695507487522e-06, + "loss": 0.0072, + "step": 399500 + }, + { + "epoch": 166.39, + "learning_rate": 3.3674708818635608e-06, + "loss": 0.0061, + "step": 400000 + }, + { + "epoch": 166.6, + "learning_rate": 3.3466722129783697e-06, + "loss": 0.0062, + "step": 400500 + }, + { + "epoch": 166.81, + "learning_rate": 3.3258735440931782e-06, + "loss": 0.0071, + "step": 401000 + }, + { + "epoch": 167.0, + "eval_cer": 0.2427, + "eval_gen_len": 13.5515, + "eval_loss": 0.4614485502243042, + "eval_runtime": 261.146, + "eval_samples_per_second": 12.277, + "eval_steps_per_second": 3.071, + "step": 401468 + }, + { + "epoch": 167.01, + "learning_rate": 3.305074875207987e-06, + "loss": 0.0076, + "step": 401500 + }, + { + "epoch": 167.22, + "learning_rate": 3.2842762063227957e-06, + "loss": 0.0068, + "step": 402000 + }, + { + "epoch": 167.43, + "learning_rate": 3.2634775374376042e-06, + "loss": 0.0075, + "step": 402500 + }, + { + "epoch": 167.64, + "learning_rate": 3.2427204658901832e-06, + "loss": 0.007, + "step": 403000 + }, + { + "epoch": 167.85, + "learning_rate": 3.221921797004992e-06, + "loss": 0.0054, + "step": 403500 + }, + { + "epoch": 168.0, + "eval_cer": 0.2471, + "eval_gen_len": 13.8865, + "eval_loss": 0.4741056263446808, + "eval_runtime": 274.0865, + "eval_samples_per_second": 11.697, + "eval_steps_per_second": 2.926, + "step": 403872 + }, + { + "epoch": 168.05, + "learning_rate": 3.2011231281198007e-06, + "loss": 0.0066, + "step": 404000 + }, + { + "epoch": 168.26, + "learning_rate": 3.1803244592346092e-06, + "loss": 0.0058, + "step": 404500 + }, + { + "epoch": 168.47, + "learning_rate": 3.1595673876871887e-06, + "loss": 0.0074, + "step": 405000 + }, + { + "epoch": 168.68, + "learning_rate": 3.138768718801997e-06, + "loss": 0.007, + "step": 405500 + }, + { + "epoch": 168.89, + "learning_rate": 3.1179700499168057e-06, + "loss": 0.0076, + "step": 406000 + }, + { + "epoch": 169.0, + "eval_cer": 0.2432, + "eval_gen_len": 13.6996, + "eval_loss": 0.46461164951324463, + "eval_runtime": 266.1722, + "eval_samples_per_second": 12.045, + "eval_steps_per_second": 3.013, + "step": 406276 + }, + { + "epoch": 169.09, + "learning_rate": 3.097171381031614e-06, + "loss": 0.0065, + "step": 406500 + }, + { + "epoch": 169.3, + "learning_rate": 3.0763727121464228e-06, + "loss": 0.0061, + "step": 407000 + }, + { + "epoch": 169.51, + "learning_rate": 3.0555740432612313e-06, + "loss": 0.0067, + "step": 407500 + }, + { + "epoch": 169.72, + "learning_rate": 3.03477537437604e-06, + "loss": 0.0075, + "step": 408000 + }, + { + "epoch": 169.93, + "learning_rate": 3.0140183028286193e-06, + "loss": 0.0064, + "step": 408500 + }, + { + "epoch": 170.0, + "eval_cer": 0.2407, + "eval_gen_len": 13.6525, + "eval_loss": 0.45978671312332153, + "eval_runtime": 264.0025, + "eval_samples_per_second": 12.144, + "eval_steps_per_second": 3.038, + "step": 408680 + }, + { + "epoch": 170.13, + "learning_rate": 2.993219633943428e-06, + "loss": 0.0058, + "step": 409000 + }, + { + "epoch": 170.34, + "learning_rate": 2.9724209650582363e-06, + "loss": 0.0072, + "step": 409500 + }, + { + "epoch": 170.55, + "learning_rate": 2.9516222961730453e-06, + "loss": 0.0073, + "step": 410000 + }, + { + "epoch": 170.76, + "learning_rate": 2.9308236272878538e-06, + "loss": 0.0067, + "step": 410500 + }, + { + "epoch": 170.97, + "learning_rate": 2.9100249584026623e-06, + "loss": 0.0066, + "step": 411000 + }, + { + "epoch": 171.0, + "eval_cer": 0.2463, + "eval_gen_len": 13.8612, + "eval_loss": 0.4791421890258789, + "eval_runtime": 266.2729, + "eval_samples_per_second": 12.04, + "eval_steps_per_second": 3.012, + "step": 411084 + }, + { + "epoch": 171.17, + "learning_rate": 2.8892262895174713e-06, + "loss": 0.0066, + "step": 411500 + }, + { + "epoch": 171.38, + "learning_rate": 2.8684276206322798e-06, + "loss": 0.0055, + "step": 412000 + }, + { + "epoch": 171.59, + "learning_rate": 2.847670549084859e-06, + "loss": 0.0072, + "step": 412500 + }, + { + "epoch": 171.8, + "learning_rate": 2.8268718801996673e-06, + "loss": 0.0067, + "step": 413000 + }, + { + "epoch": 172.0, + "eval_cer": 0.2408, + "eval_gen_len": 13.6622, + "eval_loss": 0.4588078260421753, + "eval_runtime": 266.7256, + "eval_samples_per_second": 12.02, + "eval_steps_per_second": 3.007, + "step": 413488 + }, + { + "epoch": 172.0, + "learning_rate": 2.8060732113144763e-06, + "loss": 0.0074, + "step": 413500 + }, + { + "epoch": 172.21, + "learning_rate": 2.7852745424292848e-06, + "loss": 0.0066, + "step": 414000 + }, + { + "epoch": 172.42, + "learning_rate": 2.7644758735440937e-06, + "loss": 0.0058, + "step": 414500 + }, + { + "epoch": 172.63, + "learning_rate": 2.7436772046589023e-06, + "loss": 0.0067, + "step": 415000 + }, + { + "epoch": 172.84, + "learning_rate": 2.7228785357737108e-06, + "loss": 0.0074, + "step": 415500 + }, + { + "epoch": 173.0, + "eval_cer": 0.2411, + "eval_gen_len": 13.7199, + "eval_loss": 0.45347917079925537, + "eval_runtime": 266.7945, + "eval_samples_per_second": 12.017, + "eval_steps_per_second": 3.006, + "step": 415892 + }, + { + "epoch": 173.04, + "learning_rate": 2.70212146422629e-06, + "loss": 0.0065, + "step": 416000 + }, + { + "epoch": 173.25, + "learning_rate": 2.6813227953410987e-06, + "loss": 0.0066, + "step": 416500 + }, + { + "epoch": 173.46, + "learning_rate": 2.6605241264559073e-06, + "loss": 0.006, + "step": 417000 + }, + { + "epoch": 173.67, + "learning_rate": 2.639725457570716e-06, + "loss": 0.0065, + "step": 417500 + }, + { + "epoch": 173.88, + "learning_rate": 2.618926788685524e-06, + "loss": 0.0073, + "step": 418000 + }, + { + "epoch": 174.0, + "eval_cer": 0.2413, + "eval_gen_len": 13.7389, + "eval_loss": 0.4470750093460083, + "eval_runtime": 261.7235, + "eval_samples_per_second": 12.25, + "eval_steps_per_second": 3.064, + "step": 418296 + }, + { + "epoch": 174.08, + "learning_rate": 2.598128119800333e-06, + "loss": 0.007, + "step": 418500 + }, + { + "epoch": 174.29, + "learning_rate": 2.5773294509151414e-06, + "loss": 0.0068, + "step": 419000 + }, + { + "epoch": 174.5, + "learning_rate": 2.55653078202995e-06, + "loss": 0.0073, + "step": 419500 + }, + { + "epoch": 174.71, + "learning_rate": 2.5358153078203e-06, + "loss": 0.0057, + "step": 420000 + }, + { + "epoch": 174.92, + "learning_rate": 2.5150166389351083e-06, + "loss": 0.0066, + "step": 420500 + }, + { + "epoch": 175.0, + "eval_cer": 0.2421, + "eval_gen_len": 13.7676, + "eval_loss": 0.46230319142341614, + "eval_runtime": 274.1203, + "eval_samples_per_second": 11.696, + "eval_steps_per_second": 2.926, + "step": 420700 + }, + { + "epoch": 175.12, + "learning_rate": 2.494217970049917e-06, + "loss": 0.0072, + "step": 421000 + }, + { + "epoch": 175.33, + "learning_rate": 2.473419301164726e-06, + "loss": 0.006, + "step": 421500 + }, + { + "epoch": 175.54, + "learning_rate": 2.4526206322795343e-06, + "loss": 0.0068, + "step": 422000 + }, + { + "epoch": 175.75, + "learning_rate": 2.4318635607321134e-06, + "loss": 0.0068, + "step": 422500 + }, + { + "epoch": 175.96, + "learning_rate": 2.411064891846922e-06, + "loss": 0.0067, + "step": 423000 + }, + { + "epoch": 176.0, + "eval_cer": 0.2427, + "eval_gen_len": 13.7302, + "eval_loss": 0.46741247177124023, + "eval_runtime": 271.0852, + "eval_samples_per_second": 11.827, + "eval_steps_per_second": 2.958, + "step": 423104 + }, + { + "epoch": 176.16, + "learning_rate": 2.390266222961731e-06, + "loss": 0.0065, + "step": 423500 + }, + { + "epoch": 176.37, + "learning_rate": 2.3694675540765393e-06, + "loss": 0.0062, + "step": 424000 + }, + { + "epoch": 176.58, + "learning_rate": 2.348668885191348e-06, + "loss": 0.0057, + "step": 424500 + }, + { + "epoch": 176.79, + "learning_rate": 2.327870216306157e-06, + "loss": 0.007, + "step": 425000 + }, + { + "epoch": 177.0, + "learning_rate": 2.307071547420965e-06, + "loss": 0.0077, + "step": 425500 + }, + { + "epoch": 177.0, + "eval_cer": 0.2399, + "eval_gen_len": 13.5359, + "eval_loss": 0.45084038376808167, + "eval_runtime": 262.6698, + "eval_samples_per_second": 12.205, + "eval_steps_per_second": 3.053, + "step": 425508 + }, + { + "epoch": 177.2, + "learning_rate": 2.286272878535774e-06, + "loss": 0.0059, + "step": 426000 + }, + { + "epoch": 177.41, + "learning_rate": 2.2654742096505824e-06, + "loss": 0.0067, + "step": 426500 + }, + { + "epoch": 177.62, + "learning_rate": 2.2447171381031614e-06, + "loss": 0.006, + "step": 427000 + }, + { + "epoch": 177.83, + "learning_rate": 2.2239184692179704e-06, + "loss": 0.0066, + "step": 427500 + }, + { + "epoch": 178.0, + "eval_cer": 0.2408, + "eval_gen_len": 13.7302, + "eval_loss": 0.4653932452201843, + "eval_runtime": 268.3858, + "eval_samples_per_second": 11.945, + "eval_steps_per_second": 2.988, + "step": 427912 + }, + { + "epoch": 178.04, + "learning_rate": 2.203119800332779e-06, + "loss": 0.0064, + "step": 428000 + }, + { + "epoch": 178.24, + "learning_rate": 2.1823211314475874e-06, + "loss": 0.0055, + "step": 428500 + }, + { + "epoch": 178.45, + "learning_rate": 2.161564059900167e-06, + "loss": 0.0068, + "step": 429000 + }, + { + "epoch": 178.66, + "learning_rate": 2.1407653910149754e-06, + "loss": 0.0068, + "step": 429500 + }, + { + "epoch": 178.87, + "learning_rate": 2.119966722129784e-06, + "loss": 0.0067, + "step": 430000 + }, + { + "epoch": 179.0, + "eval_cer": 0.2408, + "eval_gen_len": 13.6925, + "eval_loss": 0.462500661611557, + "eval_runtime": 268.4086, + "eval_samples_per_second": 11.944, + "eval_steps_per_second": 2.988, + "step": 430316 + }, + { + "epoch": 179.08, + "learning_rate": 2.0991680532445924e-06, + "loss": 0.0067, + "step": 430500 + }, + { + "epoch": 179.28, + "learning_rate": 2.078369384359401e-06, + "loss": 0.0059, + "step": 431000 + }, + { + "epoch": 179.49, + "learning_rate": 2.05761231281198e-06, + "loss": 0.0064, + "step": 431500 + }, + { + "epoch": 179.7, + "learning_rate": 2.036813643926789e-06, + "loss": 0.0065, + "step": 432000 + }, + { + "epoch": 179.91, + "learning_rate": 2.0160149750415974e-06, + "loss": 0.0072, + "step": 432500 + }, + { + "epoch": 180.0, + "eval_cer": 0.242, + "eval_gen_len": 13.7545, + "eval_loss": 0.46416959166526794, + "eval_runtime": 267.9605, + "eval_samples_per_second": 11.964, + "eval_steps_per_second": 2.993, + "step": 432720 + }, + { + "epoch": 180.12, + "learning_rate": 1.995216306156406e-06, + "loss": 0.0064, + "step": 433000 + }, + { + "epoch": 180.32, + "learning_rate": 1.9744592346089854e-06, + "loss": 0.0058, + "step": 433500 + }, + { + "epoch": 180.53, + "learning_rate": 1.953660565723794e-06, + "loss": 0.0063, + "step": 434000 + }, + { + "epoch": 180.74, + "learning_rate": 1.9328618968386024e-06, + "loss": 0.0065, + "step": 434500 + }, + { + "epoch": 180.95, + "learning_rate": 1.9120632279534114e-06, + "loss": 0.0066, + "step": 435000 + }, + { + "epoch": 181.0, + "eval_cer": 0.2401, + "eval_gen_len": 13.6538, + "eval_loss": 0.4692617356777191, + "eval_runtime": 260.4012, + "eval_samples_per_second": 12.312, + "eval_steps_per_second": 3.08, + "step": 435124 + }, + { + "epoch": 181.16, + "learning_rate": 1.8912645590682197e-06, + "loss": 0.0076, + "step": 435500 + }, + { + "epoch": 181.36, + "learning_rate": 1.8705074875207987e-06, + "loss": 0.0059, + "step": 436000 + }, + { + "epoch": 181.57, + "learning_rate": 1.8497088186356074e-06, + "loss": 0.0069, + "step": 436500 + }, + { + "epoch": 181.78, + "learning_rate": 1.8289101497504162e-06, + "loss": 0.0061, + "step": 437000 + }, + { + "epoch": 181.99, + "learning_rate": 1.8081114808652247e-06, + "loss": 0.0064, + "step": 437500 + }, + { + "epoch": 182.0, + "eval_cer": 0.2413, + "eval_gen_len": 13.7302, + "eval_loss": 0.4686383605003357, + "eval_runtime": 260.1458, + "eval_samples_per_second": 12.324, + "eval_steps_per_second": 3.083, + "step": 437528 + }, + { + "epoch": 182.2, + "learning_rate": 1.787354409317804e-06, + "loss": 0.0062, + "step": 438000 + }, + { + "epoch": 182.4, + "learning_rate": 1.7665557404326125e-06, + "loss": 0.0066, + "step": 438500 + }, + { + "epoch": 182.61, + "learning_rate": 1.7457570715474212e-06, + "loss": 0.0066, + "step": 439000 + }, + { + "epoch": 182.82, + "learning_rate": 1.725e-06, + "loss": 0.0067, + "step": 439500 + }, + { + "epoch": 183.0, + "eval_cer": 0.2415, + "eval_gen_len": 13.7371, + "eval_loss": 0.46899163722991943, + "eval_runtime": 270.7897, + "eval_samples_per_second": 11.839, + "eval_steps_per_second": 2.962, + "step": 439932 + }, + { + "epoch": 183.03, + "learning_rate": 1.7042013311148087e-06, + "loss": 0.0059, + "step": 440000 + }, + { + "epoch": 183.24, + "learning_rate": 1.6834026622296173e-06, + "loss": 0.0057, + "step": 440500 + }, + { + "epoch": 183.44, + "learning_rate": 1.662603993344426e-06, + "loss": 0.0062, + "step": 441000 + }, + { + "epoch": 183.65, + "learning_rate": 1.6418053244592347e-06, + "loss": 0.0061, + "step": 441500 + }, + { + "epoch": 183.86, + "learning_rate": 1.6210066555740433e-06, + "loss": 0.0067, + "step": 442000 + }, + { + "epoch": 184.0, + "eval_cer": 0.2392, + "eval_gen_len": 13.7442, + "eval_loss": 0.47148939967155457, + "eval_runtime": 271.9757, + "eval_samples_per_second": 11.788, + "eval_steps_per_second": 2.949, + "step": 442336 + }, + { + "epoch": 184.07, + "learning_rate": 1.600207986688852e-06, + "loss": 0.0066, + "step": 442500 + }, + { + "epoch": 184.28, + "learning_rate": 1.5794093178036607e-06, + "loss": 0.0063, + "step": 443000 + }, + { + "epoch": 184.48, + "learning_rate": 1.5586106489184695e-06, + "loss": 0.0063, + "step": 443500 + }, + { + "epoch": 184.69, + "learning_rate": 1.5378535773710485e-06, + "loss": 0.0063, + "step": 444000 + }, + { + "epoch": 184.9, + "learning_rate": 1.5170965058236273e-06, + "loss": 0.0062, + "step": 444500 + }, + { + "epoch": 185.0, + "eval_cer": 0.2395, + "eval_gen_len": 13.6572, + "eval_loss": 0.46137315034866333, + "eval_runtime": 269.0871, + "eval_samples_per_second": 11.914, + "eval_steps_per_second": 2.98, + "step": 444740 + }, + { + "epoch": 185.11, + "learning_rate": 1.496297836938436e-06, + "loss": 0.0066, + "step": 445000 + }, + { + "epoch": 185.32, + "learning_rate": 1.4754991680532445e-06, + "loss": 0.006, + "step": 445500 + }, + { + "epoch": 185.52, + "learning_rate": 1.4547004991680533e-06, + "loss": 0.0053, + "step": 446000 + }, + { + "epoch": 185.73, + "learning_rate": 1.433901830282862e-06, + "loss": 0.0075, + "step": 446500 + }, + { + "epoch": 185.94, + "learning_rate": 1.4131031613976705e-06, + "loss": 0.0068, + "step": 447000 + }, + { + "epoch": 186.0, + "eval_cer": 0.2396, + "eval_gen_len": 13.6185, + "eval_loss": 0.46076661348342896, + "eval_runtime": 256.715, + "eval_samples_per_second": 12.489, + "eval_steps_per_second": 3.124, + "step": 447144 + }, + { + "epoch": 186.15, + "learning_rate": 1.3923044925124793e-06, + "loss": 0.0062, + "step": 447500 + }, + { + "epoch": 186.36, + "learning_rate": 1.371505823627288e-06, + "loss": 0.0058, + "step": 448000 + }, + { + "epoch": 186.56, + "learning_rate": 1.3507071547420965e-06, + "loss": 0.0065, + "step": 448500 + }, + { + "epoch": 186.77, + "learning_rate": 1.3299500831946758e-06, + "loss": 0.0065, + "step": 449000 + }, + { + "epoch": 186.98, + "learning_rate": 1.3091514143094845e-06, + "loss": 0.0064, + "step": 449500 + }, + { + "epoch": 187.0, + "eval_cer": 0.2391, + "eval_gen_len": 13.7558, + "eval_loss": 0.47285133600234985, + "eval_runtime": 259.8275, + "eval_samples_per_second": 12.339, + "eval_steps_per_second": 3.087, + "step": 449548 + }, + { + "epoch": 187.19, + "learning_rate": 1.288352745424293e-06, + "loss": 0.0054, + "step": 450000 + }, + { + "epoch": 187.4, + "learning_rate": 1.2675540765391017e-06, + "loss": 0.0064, + "step": 450500 + }, + { + "epoch": 187.6, + "learning_rate": 1.2467554076539103e-06, + "loss": 0.0059, + "step": 451000 + }, + { + "epoch": 187.81, + "learning_rate": 1.225956738768719e-06, + "loss": 0.0059, + "step": 451500 + }, + { + "epoch": 188.0, + "eval_cer": 0.2397, + "eval_gen_len": 13.6946, + "eval_loss": 0.47259289026260376, + "eval_runtime": 273.3002, + "eval_samples_per_second": 11.731, + "eval_steps_per_second": 2.935, + "step": 451952 + }, + { + "epoch": 188.02, + "learning_rate": 1.2051996672212978e-06, + "loss": 0.0073, + "step": 452000 + }, + { + "epoch": 188.23, + "learning_rate": 1.1844009983361065e-06, + "loss": 0.0065, + "step": 452500 + }, + { + "epoch": 188.44, + "learning_rate": 1.1636439267886856e-06, + "loss": 0.0069, + "step": 453000 + }, + { + "epoch": 188.64, + "learning_rate": 1.1428452579034943e-06, + "loss": 0.0062, + "step": 453500 + }, + { + "epoch": 188.85, + "learning_rate": 1.1220881863560733e-06, + "loss": 0.0052, + "step": 454000 + }, + { + "epoch": 189.0, + "eval_cer": 0.239, + "eval_gen_len": 13.6185, + "eval_loss": 0.4666392505168915, + "eval_runtime": 268.978, + "eval_samples_per_second": 11.919, + "eval_steps_per_second": 2.982, + "step": 454356 + }, + { + "epoch": 189.06, + "learning_rate": 1.1012895174708818e-06, + "loss": 0.0061, + "step": 454500 + }, + { + "epoch": 189.27, + "learning_rate": 1.0804908485856906e-06, + "loss": 0.0065, + "step": 455000 + }, + { + "epoch": 189.48, + "learning_rate": 1.0596921797004993e-06, + "loss": 0.0051, + "step": 455500 + }, + { + "epoch": 189.68, + "learning_rate": 1.038893510815308e-06, + "loss": 0.0066, + "step": 456000 + }, + { + "epoch": 189.89, + "learning_rate": 1.0180948419301166e-06, + "loss": 0.0066, + "step": 456500 + }, + { + "epoch": 190.0, + "eval_cer": 0.2381, + "eval_gen_len": 13.6825, + "eval_loss": 0.4684942066669464, + "eval_runtime": 269.3222, + "eval_samples_per_second": 11.904, + "eval_steps_per_second": 2.978, + "step": 456760 + }, + { + "epoch": 190.1, + "learning_rate": 9.97296173044925e-07, + "loss": 0.0061, + "step": 457000 + }, + { + "epoch": 190.31, + "learning_rate": 9.765391014975043e-07, + "loss": 0.0058, + "step": 457500 + }, + { + "epoch": 190.52, + "learning_rate": 9.557404326123129e-07, + "loss": 0.0058, + "step": 458000 + }, + { + "epoch": 190.72, + "learning_rate": 9.349417637271216e-07, + "loss": 0.0065, + "step": 458500 + }, + { + "epoch": 190.93, + "learning_rate": 9.141430948419302e-07, + "loss": 0.0061, + "step": 459000 + }, + { + "epoch": 191.0, + "eval_cer": 0.238, + "eval_gen_len": 13.6538, + "eval_loss": 0.46481847763061523, + "eval_runtime": 260.0695, + "eval_samples_per_second": 12.327, + "eval_steps_per_second": 3.084, + "step": 459164 + }, + { + "epoch": 191.14, + "learning_rate": 8.933444259567387e-07, + "loss": 0.006, + "step": 459500 + }, + { + "epoch": 191.35, + "learning_rate": 8.725457570715475e-07, + "loss": 0.0055, + "step": 460000 + }, + { + "epoch": 191.56, + "learning_rate": 8.517470881863561e-07, + "loss": 0.0058, + "step": 460500 + }, + { + "epoch": 191.76, + "learning_rate": 8.309900166389352e-07, + "loss": 0.0066, + "step": 461000 + }, + { + "epoch": 191.97, + "learning_rate": 8.101913477537439e-07, + "loss": 0.0063, + "step": 461500 + }, + { + "epoch": 192.0, + "eval_cer": 0.2386, + "eval_gen_len": 13.6301, + "eval_loss": 0.46835771203041077, + "eval_runtime": 258.927, + "eval_samples_per_second": 12.382, + "eval_steps_per_second": 3.097, + "step": 461568 + }, + { + "epoch": 192.18, + "learning_rate": 7.893926788685524e-07, + "loss": 0.0063, + "step": 462000 + }, + { + "epoch": 192.39, + "learning_rate": 7.685940099833611e-07, + "loss": 0.0059, + "step": 462500 + }, + { + "epoch": 192.6, + "learning_rate": 7.477953410981697e-07, + "loss": 0.0058, + "step": 463000 + }, + { + "epoch": 192.8, + "learning_rate": 7.269966722129785e-07, + "loss": 0.0064, + "step": 463500 + }, + { + "epoch": 193.0, + "eval_cer": 0.2377, + "eval_gen_len": 13.6687, + "eval_loss": 0.4715929627418518, + "eval_runtime": 259.4667, + "eval_samples_per_second": 12.356, + "eval_steps_per_second": 3.091, + "step": 463972 + }, + { + "epoch": 193.01, + "learning_rate": 7.061980033277871e-07, + "loss": 0.0051, + "step": 464000 + }, + { + "epoch": 193.22, + "learning_rate": 6.853993344425957e-07, + "loss": 0.0054, + "step": 464500 + } + ], + "max_steps": 480800, + "num_train_epochs": 200, + "total_flos": 2079248374038528.0, + "trial_name": null, + "trial_params": null +}