{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5127448755767339, "eval_steps": 200, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001512744875576734, "grad_norm": 0.8282566070556641, "learning_rate": 0.00019996, "loss": 3.4576, "step": 10 }, { "epoch": 0.003025489751153468, "grad_norm": 0.1628154069185257, "learning_rate": 0.00019992000000000002, "loss": 0.0992, "step": 20 }, { "epoch": 0.004538234626730202, "grad_norm": 0.17421123385429382, "learning_rate": 0.00019988, "loss": 0.0666, "step": 30 }, { "epoch": 0.006050979502306936, "grad_norm": 0.08850277960300446, "learning_rate": 0.00019984, "loss": 0.0661, "step": 40 }, { "epoch": 0.00756372437788367, "grad_norm": 0.11368270963430405, "learning_rate": 0.0001998, "loss": 0.0639, "step": 50 }, { "epoch": 0.009076469253460404, "grad_norm": 0.12990300357341766, "learning_rate": 0.00019976000000000003, "loss": 0.0617, "step": 60 }, { "epoch": 0.010589214129037138, "grad_norm": 0.08885369449853897, "learning_rate": 0.00019972000000000002, "loss": 0.0643, "step": 70 }, { "epoch": 0.012101959004613872, "grad_norm": 0.07073435187339783, "learning_rate": 0.00019968, "loss": 0.0629, "step": 80 }, { "epoch": 0.013614703880190605, "grad_norm": 0.061856113374233246, "learning_rate": 0.00019964, "loss": 0.061, "step": 90 }, { "epoch": 0.01512744875576734, "grad_norm": 0.06827201694250107, "learning_rate": 0.0001996, "loss": 0.0586, "step": 100 }, { "epoch": 0.016640193631344075, "grad_norm": 0.07220456004142761, "learning_rate": 0.00019956000000000002, "loss": 0.055, "step": 110 }, { "epoch": 0.018152938506920808, "grad_norm": 0.06632555276155472, "learning_rate": 0.00019952000000000001, "loss": 0.0586, "step": 120 }, { "epoch": 0.01966568338249754, "grad_norm": 0.09966724365949631, "learning_rate": 0.00019948, "loss": 0.0621, "step": 130 }, { "epoch": 0.021178428258074276, "grad_norm": 0.0833888053894043, "learning_rate": 0.00019944, "loss": 0.0591, "step": 140 }, { "epoch": 0.02269117313365101, "grad_norm": 0.08170727640390396, "learning_rate": 0.00019940000000000002, "loss": 0.055, "step": 150 }, { "epoch": 0.024203918009227745, "grad_norm": 0.07089231163263321, "learning_rate": 0.00019936000000000002, "loss": 0.0582, "step": 160 }, { "epoch": 0.025716662884804477, "grad_norm": 0.09390200674533844, "learning_rate": 0.00019932, "loss": 0.0628, "step": 170 }, { "epoch": 0.02722940776038121, "grad_norm": 0.06722863018512726, "learning_rate": 0.00019928, "loss": 0.0591, "step": 180 }, { "epoch": 0.028742152635957946, "grad_norm": 0.0743609368801117, "learning_rate": 0.00019924, "loss": 0.0626, "step": 190 }, { "epoch": 0.03025489751153468, "grad_norm": 0.08125407248735428, "learning_rate": 0.00019920000000000002, "loss": 0.0601, "step": 200 }, { "epoch": 0.03025489751153468, "eval_cer": 0.5356160728183765, "eval_loss": 0.05078176036477089, "eval_runtime": 10281.8657, "eval_samples_per_second": 2.047, "eval_steps_per_second": 0.256, "step": 200 }, { "epoch": 0.03176764238711141, "grad_norm": 0.07030890136957169, "learning_rate": 0.00019916, "loss": 0.0597, "step": 210 }, { "epoch": 0.03328038726268815, "grad_norm": 0.05290469154715538, "learning_rate": 0.00019912, "loss": 0.0587, "step": 220 }, { "epoch": 0.03479313213826488, "grad_norm": 0.07339277863502502, "learning_rate": 0.00019908, "loss": 0.0529, "step": 230 }, { "epoch": 0.036305877013841616, "grad_norm": 0.0727711170911789, "learning_rate": 0.00019904, "loss": 0.0539, "step": 240 }, { "epoch": 0.03781862188941835, "grad_norm": 0.07383541762828827, "learning_rate": 0.000199, "loss": 0.0532, "step": 250 }, { "epoch": 0.03933136676499508, "grad_norm": 0.07042526453733444, "learning_rate": 0.00019896, "loss": 0.0571, "step": 260 }, { "epoch": 0.04084411164057182, "grad_norm": 0.08188482373952866, "learning_rate": 0.00019892000000000003, "loss": 0.0521, "step": 270 }, { "epoch": 0.04235685651614855, "grad_norm": 0.07334589958190918, "learning_rate": 0.00019888, "loss": 0.0532, "step": 280 }, { "epoch": 0.043869601391725285, "grad_norm": 0.06326377391815186, "learning_rate": 0.00019884000000000001, "loss": 0.0528, "step": 290 }, { "epoch": 0.04538234626730202, "grad_norm": 0.05303795263171196, "learning_rate": 0.0001988, "loss": 0.0539, "step": 300 }, { "epoch": 0.04689509114287875, "grad_norm": 0.058723289519548416, "learning_rate": 0.00019876, "loss": 0.0469, "step": 310 }, { "epoch": 0.04840783601845549, "grad_norm": 0.08683237433433533, "learning_rate": 0.00019872000000000002, "loss": 0.0601, "step": 320 }, { "epoch": 0.04992058089403222, "grad_norm": 0.07650341093540192, "learning_rate": 0.00019868, "loss": 0.0582, "step": 330 }, { "epoch": 0.051433325769608955, "grad_norm": 0.054965659976005554, "learning_rate": 0.00019864, "loss": 0.0548, "step": 340 }, { "epoch": 0.05294607064518569, "grad_norm": 0.06949716061353683, "learning_rate": 0.0001986, "loss": 0.0581, "step": 350 }, { "epoch": 0.05445881552076242, "grad_norm": 0.10514732450246811, "learning_rate": 0.00019856000000000002, "loss": 0.0587, "step": 360 }, { "epoch": 0.05597156039633916, "grad_norm": 0.06586117297410965, "learning_rate": 0.00019852000000000002, "loss": 0.0561, "step": 370 }, { "epoch": 0.05748430527191589, "grad_norm": 0.09821395576000214, "learning_rate": 0.00019848, "loss": 0.0556, "step": 380 }, { "epoch": 0.058997050147492625, "grad_norm": 0.06488014757633209, "learning_rate": 0.00019844, "loss": 0.0634, "step": 390 }, { "epoch": 0.06050979502306936, "grad_norm": 0.06910958141088486, "learning_rate": 0.0001984, "loss": 0.052, "step": 400 }, { "epoch": 0.06050979502306936, "eval_cer": 0.2714758865721352, "eval_loss": 0.04847713187336922, "eval_runtime": 10484.76, "eval_samples_per_second": 2.008, "eval_steps_per_second": 0.251, "step": 400 }, { "epoch": 0.0620225398986461, "grad_norm": 0.048563435673713684, "learning_rate": 0.00019836000000000002, "loss": 0.0565, "step": 410 }, { "epoch": 0.06353528477422282, "grad_norm": 0.055841896682977676, "learning_rate": 0.00019832, "loss": 0.0547, "step": 420 }, { "epoch": 0.06504802964979955, "grad_norm": 0.05644605681300163, "learning_rate": 0.00019828, "loss": 0.0575, "step": 430 }, { "epoch": 0.0665607745253763, "grad_norm": 0.05617703124880791, "learning_rate": 0.00019824, "loss": 0.0514, "step": 440 }, { "epoch": 0.06807351940095303, "grad_norm": 0.11480820178985596, "learning_rate": 0.00019820000000000002, "loss": 0.0562, "step": 450 }, { "epoch": 0.06958626427652977, "grad_norm": 0.06004955247044563, "learning_rate": 0.00019816000000000001, "loss": 0.0575, "step": 460 }, { "epoch": 0.0710990091521065, "grad_norm": 0.07830873131752014, "learning_rate": 0.00019812, "loss": 0.0621, "step": 470 }, { "epoch": 0.07261175402768323, "grad_norm": 0.052650969475507736, "learning_rate": 0.00019808, "loss": 0.0599, "step": 480 }, { "epoch": 0.07412449890325996, "grad_norm": 0.09298545122146606, "learning_rate": 0.00019804, "loss": 0.0559, "step": 490 }, { "epoch": 0.0756372437788367, "grad_norm": 0.06198689714074135, "learning_rate": 0.00019800000000000002, "loss": 0.047, "step": 500 }, { "epoch": 0.07714998865441343, "grad_norm": 0.06688915193080902, "learning_rate": 0.00019796, "loss": 0.0523, "step": 510 }, { "epoch": 0.07866273352999016, "grad_norm": 0.06676903367042542, "learning_rate": 0.00019792000000000003, "loss": 0.0509, "step": 520 }, { "epoch": 0.08017547840556691, "grad_norm": 0.06219707056879997, "learning_rate": 0.00019788, "loss": 0.0553, "step": 530 }, { "epoch": 0.08168822328114364, "grad_norm": 0.07905440032482147, "learning_rate": 0.00019784, "loss": 0.0506, "step": 540 }, { "epoch": 0.08320096815672037, "grad_norm": 0.08591905236244202, "learning_rate": 0.0001978, "loss": 0.0603, "step": 550 }, { "epoch": 0.0847137130322971, "grad_norm": 0.05921874940395355, "learning_rate": 0.00019776, "loss": 0.0562, "step": 560 }, { "epoch": 0.08622645790787384, "grad_norm": 0.058868613094091415, "learning_rate": 0.00019772000000000002, "loss": 0.0517, "step": 570 }, { "epoch": 0.08773920278345057, "grad_norm": 0.06818246096372604, "learning_rate": 0.00019768, "loss": 0.0478, "step": 580 }, { "epoch": 0.0892519476590273, "grad_norm": 0.07364825904369354, "learning_rate": 0.00019764, "loss": 0.0553, "step": 590 }, { "epoch": 0.09076469253460404, "grad_norm": 0.07647281885147095, "learning_rate": 0.0001976, "loss": 0.0527, "step": 600 }, { "epoch": 0.09076469253460404, "eval_cer": 0.282631389088609, "eval_loss": 0.047340717166662216, "eval_runtime": 10466.4392, "eval_samples_per_second": 2.011, "eval_steps_per_second": 0.251, "step": 600 }, { "epoch": 0.09227743741018077, "grad_norm": 0.0819125548005104, "learning_rate": 0.00019756, "loss": 0.0509, "step": 610 }, { "epoch": 0.0937901822857575, "grad_norm": 0.06566735357046127, "learning_rate": 0.00019752000000000002, "loss": 0.0583, "step": 620 }, { "epoch": 0.09530292716133425, "grad_norm": 0.06856215745210648, "learning_rate": 0.00019748, "loss": 0.0465, "step": 630 }, { "epoch": 0.09681567203691098, "grad_norm": 0.06130633130669594, "learning_rate": 0.00019744, "loss": 0.0509, "step": 640 }, { "epoch": 0.09832841691248771, "grad_norm": 0.08208902925252914, "learning_rate": 0.0001974, "loss": 0.0549, "step": 650 }, { "epoch": 0.09984116178806444, "grad_norm": 0.08106379210948944, "learning_rate": 0.00019736000000000002, "loss": 0.0584, "step": 660 }, { "epoch": 0.10135390666364118, "grad_norm": 0.08364614844322205, "learning_rate": 0.00019732000000000001, "loss": 0.0543, "step": 670 }, { "epoch": 0.10286665153921791, "grad_norm": 0.06432674080133438, "learning_rate": 0.00019728, "loss": 0.0535, "step": 680 }, { "epoch": 0.10437939641479464, "grad_norm": 0.07217614352703094, "learning_rate": 0.00019724, "loss": 0.0521, "step": 690 }, { "epoch": 0.10589214129037137, "grad_norm": 0.06074230372905731, "learning_rate": 0.0001972, "loss": 0.0545, "step": 700 }, { "epoch": 0.10740488616594811, "grad_norm": 0.04888018220663071, "learning_rate": 0.00019716000000000002, "loss": 0.0445, "step": 710 }, { "epoch": 0.10891763104152484, "grad_norm": 0.07705683261156082, "learning_rate": 0.00019712, "loss": 0.0491, "step": 720 }, { "epoch": 0.11043037591710159, "grad_norm": 0.06741231679916382, "learning_rate": 0.00019708000000000003, "loss": 0.053, "step": 730 }, { "epoch": 0.11194312079267832, "grad_norm": 0.0673738569021225, "learning_rate": 0.00019704, "loss": 0.0473, "step": 740 }, { "epoch": 0.11345586566825505, "grad_norm": 0.06236235797405243, "learning_rate": 0.00019700000000000002, "loss": 0.0538, "step": 750 }, { "epoch": 0.11496861054383178, "grad_norm": 0.0538531057536602, "learning_rate": 0.00019696, "loss": 0.0414, "step": 760 }, { "epoch": 0.11648135541940852, "grad_norm": 0.09818791598081589, "learning_rate": 0.00019692, "loss": 0.0551, "step": 770 }, { "epoch": 0.11799410029498525, "grad_norm": 0.06459952145814896, "learning_rate": 0.00019688000000000003, "loss": 0.0543, "step": 780 }, { "epoch": 0.11950684517056198, "grad_norm": 0.09495878219604492, "learning_rate": 0.00019684, "loss": 0.0566, "step": 790 }, { "epoch": 0.12101959004613871, "grad_norm": 0.06249309703707695, "learning_rate": 0.0001968, "loss": 0.0492, "step": 800 }, { "epoch": 0.12101959004613871, "eval_cer": 0.0030890735373690806, "eval_loss": 0.046879783272743225, "eval_runtime": 10443.0859, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.252, "step": 800 }, { "epoch": 0.12253233492171545, "grad_norm": 0.06483816355466843, "learning_rate": 0.00019676, "loss": 0.048, "step": 810 }, { "epoch": 0.1240450797972922, "grad_norm": 0.05618014931678772, "learning_rate": 0.00019672000000000003, "loss": 0.0484, "step": 820 }, { "epoch": 0.1255578246728689, "grad_norm": 0.07441507279872894, "learning_rate": 0.00019668000000000002, "loss": 0.0548, "step": 830 }, { "epoch": 0.12707056954844564, "grad_norm": 0.05274181067943573, "learning_rate": 0.00019664000000000001, "loss": 0.0619, "step": 840 }, { "epoch": 0.12858331442402238, "grad_norm": 0.06264190375804901, "learning_rate": 0.0001966, "loss": 0.0525, "step": 850 }, { "epoch": 0.1300960592995991, "grad_norm": 0.07662319391965866, "learning_rate": 0.00019656, "loss": 0.0532, "step": 860 }, { "epoch": 0.13160880417517587, "grad_norm": 0.06203316152095795, "learning_rate": 0.00019652000000000002, "loss": 0.0525, "step": 870 }, { "epoch": 0.1331215490507526, "grad_norm": 0.1326906681060791, "learning_rate": 0.00019648000000000002, "loss": 0.0539, "step": 880 }, { "epoch": 0.13463429392632933, "grad_norm": 0.10350421816110611, "learning_rate": 0.00019644, "loss": 0.0556, "step": 890 }, { "epoch": 0.13614703880190607, "grad_norm": 0.049543242901563644, "learning_rate": 0.0001964, "loss": 0.0482, "step": 900 }, { "epoch": 0.1376597836774828, "grad_norm": 0.11776097118854523, "learning_rate": 0.00019636000000000002, "loss": 0.0538, "step": 910 }, { "epoch": 0.13917252855305953, "grad_norm": 0.05535553768277168, "learning_rate": 0.00019632000000000002, "loss": 0.052, "step": 920 }, { "epoch": 0.14068527342863626, "grad_norm": 0.05945896357297897, "learning_rate": 0.00019628, "loss": 0.0491, "step": 930 }, { "epoch": 0.142198018304213, "grad_norm": 0.1228972002863884, "learning_rate": 0.00019624, "loss": 0.0511, "step": 940 }, { "epoch": 0.14371076317978973, "grad_norm": 0.08868791162967682, "learning_rate": 0.0001962, "loss": 0.057, "step": 950 }, { "epoch": 0.14522350805536646, "grad_norm": 0.07960449159145355, "learning_rate": 0.00019616000000000002, "loss": 0.0514, "step": 960 }, { "epoch": 0.1467362529309432, "grad_norm": 0.06392108649015427, "learning_rate": 0.00019612, "loss": 0.0558, "step": 970 }, { "epoch": 0.14824899780651993, "grad_norm": 0.07048727571964264, "learning_rate": 0.00019608, "loss": 0.053, "step": 980 }, { "epoch": 0.14976174268209666, "grad_norm": 0.10491488873958588, "learning_rate": 0.00019604, "loss": 0.0489, "step": 990 }, { "epoch": 0.1512744875576734, "grad_norm": 0.059835776686668396, "learning_rate": 0.000196, "loss": 0.0474, "step": 1000 }, { "epoch": 0.1512744875576734, "eval_cer": 0.4367181574025345, "eval_loss": 0.04569260776042938, "eval_runtime": 10457.5718, "eval_samples_per_second": 2.013, "eval_steps_per_second": 0.252, "step": 1000 }, { "epoch": 0.15278723243325013, "grad_norm": 0.07570289075374603, "learning_rate": 0.00019596000000000001, "loss": 0.0522, "step": 1010 }, { "epoch": 0.15429997730882686, "grad_norm": 0.09082864969968796, "learning_rate": 0.00019592, "loss": 0.0516, "step": 1020 }, { "epoch": 0.1558127221844036, "grad_norm": 0.06894449889659882, "learning_rate": 0.00019588000000000003, "loss": 0.0489, "step": 1030 }, { "epoch": 0.15732546705998032, "grad_norm": 0.05989064276218414, "learning_rate": 0.00019584, "loss": 0.0514, "step": 1040 }, { "epoch": 0.15883821193555706, "grad_norm": 0.060047443956136703, "learning_rate": 0.00019580000000000002, "loss": 0.047, "step": 1050 }, { "epoch": 0.16035095681113382, "grad_norm": 0.06459174305200577, "learning_rate": 0.00019576, "loss": 0.0532, "step": 1060 }, { "epoch": 0.16186370168671055, "grad_norm": 0.061583805829286575, "learning_rate": 0.00019572, "loss": 0.0485, "step": 1070 }, { "epoch": 0.16337644656228728, "grad_norm": 0.060534268617630005, "learning_rate": 0.00019568000000000002, "loss": 0.0468, "step": 1080 }, { "epoch": 0.164889191437864, "grad_norm": 0.06731607764959335, "learning_rate": 0.00019564, "loss": 0.0481, "step": 1090 }, { "epoch": 0.16640193631344075, "grad_norm": 0.0757998675107956, "learning_rate": 0.0001956, "loss": 0.056, "step": 1100 }, { "epoch": 0.16791468118901748, "grad_norm": 0.08009450882673264, "learning_rate": 0.00019556, "loss": 0.0523, "step": 1110 }, { "epoch": 0.1694274260645942, "grad_norm": 2.663090944290161, "learning_rate": 0.00019552000000000003, "loss": 0.1404, "step": 1120 }, { "epoch": 0.17094017094017094, "grad_norm": 14.877944946289062, "learning_rate": 0.00019548000000000002, "loss": 0.1442, "step": 1130 }, { "epoch": 0.17245291581574768, "grad_norm": 2.8173887729644775, "learning_rate": 0.000195448, "loss": 0.5461, "step": 1140 }, { "epoch": 0.1739656606913244, "grad_norm": 9.367515563964844, "learning_rate": 0.00019540800000000002, "loss": 0.2832, "step": 1150 }, { "epoch": 0.17547840556690114, "grad_norm": 0.34991636872291565, "learning_rate": 0.00019536800000000002, "loss": 0.1497, "step": 1160 }, { "epoch": 0.17699115044247787, "grad_norm": 0.10464385151863098, "learning_rate": 0.000195328, "loss": 0.0686, "step": 1170 }, { "epoch": 0.1785038953180546, "grad_norm": 0.8961012363433838, "learning_rate": 0.000195288, "loss": 0.0822, "step": 1180 }, { "epoch": 0.18001664019363134, "grad_norm": 8.467473983764648, "learning_rate": 0.000195248, "loss": 0.0949, "step": 1190 }, { "epoch": 0.18152938506920807, "grad_norm": 0.08059060573577881, "learning_rate": 0.00019520800000000002, "loss": 0.0552, "step": 1200 }, { "epoch": 0.18152938506920807, "eval_cer": 0.0833932493767496, "eval_loss": 0.04637393727898598, "eval_runtime": 10459.5021, "eval_samples_per_second": 2.013, "eval_steps_per_second": 0.252, "step": 1200 }, { "epoch": 0.1830421299447848, "grad_norm": 0.08795847743749619, "learning_rate": 0.000195168, "loss": 0.055, "step": 1210 }, { "epoch": 0.18455487482036154, "grad_norm": 0.10272721946239471, "learning_rate": 0.000195128, "loss": 0.0557, "step": 1220 }, { "epoch": 0.18606761969593827, "grad_norm": 0.23404774069786072, "learning_rate": 0.000195088, "loss": 0.0611, "step": 1230 }, { "epoch": 0.187580364571515, "grad_norm": 0.2968621253967285, "learning_rate": 0.00019504800000000002, "loss": 0.0817, "step": 1240 }, { "epoch": 0.18909310944709176, "grad_norm": 0.08634278923273087, "learning_rate": 0.00019500800000000001, "loss": 0.0685, "step": 1250 }, { "epoch": 0.1906058543226685, "grad_norm": 0.11241244524717331, "learning_rate": 0.000194968, "loss": 0.0563, "step": 1260 }, { "epoch": 0.19211859919824523, "grad_norm": 0.17380298674106598, "learning_rate": 0.000194928, "loss": 0.065, "step": 1270 }, { "epoch": 0.19363134407382196, "grad_norm": 0.13615791499614716, "learning_rate": 0.000194888, "loss": 0.0667, "step": 1280 }, { "epoch": 0.1951440889493987, "grad_norm": 0.0854301005601883, "learning_rate": 0.00019484800000000002, "loss": 0.0507, "step": 1290 }, { "epoch": 0.19665683382497542, "grad_norm": 0.08915933966636658, "learning_rate": 0.000194808, "loss": 0.0561, "step": 1300 }, { "epoch": 0.19816957870055216, "grad_norm": 0.09583040326833725, "learning_rate": 0.00019476800000000003, "loss": 0.0514, "step": 1310 }, { "epoch": 0.1996823235761289, "grad_norm": 0.09624961763620377, "learning_rate": 0.000194728, "loss": 0.052, "step": 1320 }, { "epoch": 0.20119506845170562, "grad_norm": 0.05612370744347572, "learning_rate": 0.00019468800000000002, "loss": 0.0471, "step": 1330 }, { "epoch": 0.20270781332728235, "grad_norm": 0.0653730109333992, "learning_rate": 0.000194648, "loss": 0.0521, "step": 1340 }, { "epoch": 0.2042205582028591, "grad_norm": 0.07432978600263596, "learning_rate": 0.000194608, "loss": 0.0577, "step": 1350 }, { "epoch": 0.20573330307843582, "grad_norm": 0.05863150209188461, "learning_rate": 0.00019456800000000003, "loss": 0.0435, "step": 1360 }, { "epoch": 0.20724604795401255, "grad_norm": 0.056969739496707916, "learning_rate": 0.000194528, "loss": 0.0502, "step": 1370 }, { "epoch": 0.20875879282958928, "grad_norm": 0.10658754408359528, "learning_rate": 0.000194488, "loss": 0.0469, "step": 1380 }, { "epoch": 0.21027153770516602, "grad_norm": 0.06535681337118149, "learning_rate": 0.000194448, "loss": 0.0519, "step": 1390 }, { "epoch": 0.21178428258074275, "grad_norm": 0.08987314254045486, "learning_rate": 0.000194408, "loss": 0.0482, "step": 1400 }, { "epoch": 0.21178428258074275, "eval_cer": 0.14607469615771385, "eval_loss": 0.04351452365517616, "eval_runtime": 10473.9712, "eval_samples_per_second": 2.01, "eval_steps_per_second": 0.251, "step": 1400 }, { "epoch": 0.21329702745631948, "grad_norm": 0.09238473325967789, "learning_rate": 0.00019436800000000002, "loss": 0.0483, "step": 1410 }, { "epoch": 0.21480977233189621, "grad_norm": 0.10443761199712753, "learning_rate": 0.000194328, "loss": 0.054, "step": 1420 }, { "epoch": 0.21632251720747295, "grad_norm": 0.0742131844162941, "learning_rate": 0.000194288, "loss": 0.0507, "step": 1430 }, { "epoch": 0.21783526208304968, "grad_norm": 0.09358492493629456, "learning_rate": 0.000194248, "loss": 0.0496, "step": 1440 }, { "epoch": 0.21934800695862644, "grad_norm": 0.07695715129375458, "learning_rate": 0.00019420800000000002, "loss": 0.046, "step": 1450 }, { "epoch": 0.22086075183420317, "grad_norm": 0.07772234827280045, "learning_rate": 0.00019416800000000002, "loss": 0.0468, "step": 1460 }, { "epoch": 0.2223734967097799, "grad_norm": 0.04500894993543625, "learning_rate": 0.000194128, "loss": 0.0428, "step": 1470 }, { "epoch": 0.22388624158535664, "grad_norm": 0.08258084207773209, "learning_rate": 0.000194088, "loss": 0.0542, "step": 1480 }, { "epoch": 0.22539898646093337, "grad_norm": 0.06530752032995224, "learning_rate": 0.000194048, "loss": 0.0477, "step": 1490 }, { "epoch": 0.2269117313365101, "grad_norm": 0.06770725548267365, "learning_rate": 0.00019400800000000002, "loss": 0.052, "step": 1500 }, { "epoch": 0.22842447621208684, "grad_norm": 0.04499737173318863, "learning_rate": 0.000193968, "loss": 0.0392, "step": 1510 }, { "epoch": 0.22993722108766357, "grad_norm": 0.0594199039041996, "learning_rate": 0.000193928, "loss": 0.0469, "step": 1520 }, { "epoch": 0.2314499659632403, "grad_norm": 0.05143499746918678, "learning_rate": 0.000193888, "loss": 0.0384, "step": 1530 }, { "epoch": 0.23296271083881703, "grad_norm": 0.05464276298880577, "learning_rate": 0.00019384800000000002, "loss": 0.0479, "step": 1540 }, { "epoch": 0.23447545571439377, "grad_norm": 0.0698809027671814, "learning_rate": 0.000193808, "loss": 0.0493, "step": 1550 }, { "epoch": 0.2359882005899705, "grad_norm": 0.059237249195575714, "learning_rate": 0.000193768, "loss": 0.0493, "step": 1560 }, { "epoch": 0.23750094546554723, "grad_norm": 0.08654357492923737, "learning_rate": 0.000193728, "loss": 0.0481, "step": 1570 }, { "epoch": 0.23901369034112396, "grad_norm": 0.19063305854797363, "learning_rate": 0.000193688, "loss": 0.051, "step": 1580 }, { "epoch": 0.2405264352167007, "grad_norm": 0.08095410466194153, "learning_rate": 0.000193648, "loss": 0.0447, "step": 1590 }, { "epoch": 0.24203918009227743, "grad_norm": 0.056007932871580124, "learning_rate": 0.000193608, "loss": 0.0431, "step": 1600 }, { "epoch": 0.24203918009227743, "eval_cer": 0.1667197881072213, "eval_loss": 0.04373455420136452, "eval_runtime": 10595.1515, "eval_samples_per_second": 1.987, "eval_steps_per_second": 0.248, "step": 1600 }, { "epoch": 0.24355192496785416, "grad_norm": 0.06981740891933441, "learning_rate": 0.00019356800000000003, "loss": 0.0442, "step": 1610 }, { "epoch": 0.2450646698434309, "grad_norm": 0.10189545899629593, "learning_rate": 0.000193528, "loss": 0.0477, "step": 1620 }, { "epoch": 0.24657741471900763, "grad_norm": 0.06565351039171219, "learning_rate": 0.00019348800000000002, "loss": 0.0532, "step": 1630 }, { "epoch": 0.2480901595945844, "grad_norm": 0.06872796267271042, "learning_rate": 0.000193448, "loss": 0.0472, "step": 1640 }, { "epoch": 0.24960290447016112, "grad_norm": 0.06040889397263527, "learning_rate": 0.000193408, "loss": 0.0463, "step": 1650 }, { "epoch": 0.2511156493457378, "grad_norm": 0.08789139986038208, "learning_rate": 0.00019336800000000002, "loss": 0.0495, "step": 1660 }, { "epoch": 0.25262839422131456, "grad_norm": 0.0869157686829567, "learning_rate": 0.00019332800000000002, "loss": 0.0491, "step": 1670 }, { "epoch": 0.2541411390968913, "grad_norm": 0.06886725127696991, "learning_rate": 0.000193288, "loss": 0.0508, "step": 1680 }, { "epoch": 0.255653883972468, "grad_norm": 0.06138046458363533, "learning_rate": 0.000193248, "loss": 0.0435, "step": 1690 }, { "epoch": 0.25716662884804475, "grad_norm": 0.05554139241576195, "learning_rate": 0.00019320800000000002, "loss": 0.0483, "step": 1700 }, { "epoch": 0.2586793737236215, "grad_norm": 0.06712419539690018, "learning_rate": 0.00019316800000000002, "loss": 0.0545, "step": 1710 }, { "epoch": 0.2601921185991982, "grad_norm": 0.07289120554924011, "learning_rate": 0.000193128, "loss": 0.0481, "step": 1720 }, { "epoch": 0.261704863474775, "grad_norm": 0.07003842294216156, "learning_rate": 0.000193088, "loss": 0.0493, "step": 1730 }, { "epoch": 0.26321760835035174, "grad_norm": 0.06333723664283752, "learning_rate": 0.000193048, "loss": 0.0536, "step": 1740 }, { "epoch": 0.26473035322592847, "grad_norm": 0.0609460324048996, "learning_rate": 0.00019300800000000002, "loss": 0.0516, "step": 1750 }, { "epoch": 0.2662430981015052, "grad_norm": 0.14176234602928162, "learning_rate": 0.000192968, "loss": 0.0522, "step": 1760 }, { "epoch": 0.26775584297708194, "grad_norm": 0.09526730328798294, "learning_rate": 0.000192928, "loss": 0.0468, "step": 1770 }, { "epoch": 0.26926858785265867, "grad_norm": 0.05794398859143257, "learning_rate": 0.000192888, "loss": 0.051, "step": 1780 }, { "epoch": 0.2707813327282354, "grad_norm": 0.07408788055181503, "learning_rate": 0.000192848, "loss": 0.0482, "step": 1790 }, { "epoch": 0.27229407760381213, "grad_norm": 0.07873456180095673, "learning_rate": 0.00019280800000000001, "loss": 0.0576, "step": 1800 }, { "epoch": 0.27229407760381213, "eval_cer": 0.28151275038111545, "eval_loss": 0.042666129767894745, "eval_runtime": 10460.0372, "eval_samples_per_second": 2.013, "eval_steps_per_second": 0.252, "step": 1800 }, { "epoch": 0.27380682247938887, "grad_norm": 0.06786733120679855, "learning_rate": 0.000192768, "loss": 0.0505, "step": 1810 }, { "epoch": 0.2753195673549656, "grad_norm": 0.090096116065979, "learning_rate": 0.00019272800000000003, "loss": 0.0458, "step": 1820 }, { "epoch": 0.27683231223054233, "grad_norm": 0.058033574372529984, "learning_rate": 0.000192688, "loss": 0.0415, "step": 1830 }, { "epoch": 0.27834505710611906, "grad_norm": 0.09522871673107147, "learning_rate": 0.00019264800000000002, "loss": 0.0456, "step": 1840 }, { "epoch": 0.2798578019816958, "grad_norm": 0.06533698737621307, "learning_rate": 0.000192608, "loss": 0.045, "step": 1850 }, { "epoch": 0.28137054685727253, "grad_norm": 0.07162319868803024, "learning_rate": 0.000192568, "loss": 0.0511, "step": 1860 }, { "epoch": 0.28288329173284926, "grad_norm": 0.06015852093696594, "learning_rate": 0.00019252800000000002, "loss": 0.0453, "step": 1870 }, { "epoch": 0.284396036608426, "grad_norm": 0.0789792612195015, "learning_rate": 0.000192488, "loss": 0.0498, "step": 1880 }, { "epoch": 0.2859087814840027, "grad_norm": 0.05619093030691147, "learning_rate": 0.000192448, "loss": 0.0454, "step": 1890 }, { "epoch": 0.28742152635957946, "grad_norm": 0.061943668872117996, "learning_rate": 0.000192408, "loss": 0.0496, "step": 1900 }, { "epoch": 0.2889342712351562, "grad_norm": 0.07192958891391754, "learning_rate": 0.00019236800000000003, "loss": 0.05, "step": 1910 }, { "epoch": 0.2904470161107329, "grad_norm": 0.07053862512111664, "learning_rate": 0.00019232800000000002, "loss": 0.0504, "step": 1920 }, { "epoch": 0.29195976098630966, "grad_norm": 0.06491555273532867, "learning_rate": 0.000192288, "loss": 0.0478, "step": 1930 }, { "epoch": 0.2934725058618864, "grad_norm": 0.06389233469963074, "learning_rate": 0.000192248, "loss": 0.0469, "step": 1940 }, { "epoch": 0.2949852507374631, "grad_norm": 0.06336333602666855, "learning_rate": 0.000192208, "loss": 0.0472, "step": 1950 }, { "epoch": 0.29649799561303986, "grad_norm": 0.06351201981306076, "learning_rate": 0.00019216800000000002, "loss": 0.0459, "step": 1960 }, { "epoch": 0.2980107404886166, "grad_norm": 0.0773550271987915, "learning_rate": 0.00019212800000000001, "loss": 0.0435, "step": 1970 }, { "epoch": 0.2995234853641933, "grad_norm": 0.07999245822429657, "learning_rate": 0.000192088, "loss": 0.051, "step": 1980 }, { "epoch": 0.30103623023977005, "grad_norm": 0.05664638802409172, "learning_rate": 0.000192048, "loss": 0.0493, "step": 1990 }, { "epoch": 0.3025489751153468, "grad_norm": 0.050149012356996536, "learning_rate": 0.00019200800000000002, "loss": 0.0491, "step": 2000 }, { "epoch": 0.3025489751153468, "eval_cer": 0.10787543886957575, "eval_loss": 0.042158011347055435, "eval_runtime": 10458.1763, "eval_samples_per_second": 2.013, "eval_steps_per_second": 0.252, "step": 2000 }, { "epoch": 0.3040617199909235, "grad_norm": 0.06383787840604782, "learning_rate": 0.00019196800000000002, "loss": 0.0421, "step": 2010 }, { "epoch": 0.30557446486650025, "grad_norm": 0.05740641430020332, "learning_rate": 0.000191928, "loss": 0.0499, "step": 2020 }, { "epoch": 0.307087209742077, "grad_norm": 0.07163075357675552, "learning_rate": 0.000191888, "loss": 0.0431, "step": 2030 }, { "epoch": 0.3085999546176537, "grad_norm": 0.05976075306534767, "learning_rate": 0.000191848, "loss": 0.0476, "step": 2040 }, { "epoch": 0.31011269949323045, "grad_norm": 0.0871894434094429, "learning_rate": 0.00019180800000000002, "loss": 0.0449, "step": 2050 }, { "epoch": 0.3116254443688072, "grad_norm": 0.07474277913570404, "learning_rate": 0.000191768, "loss": 0.0422, "step": 2060 }, { "epoch": 0.3131381892443839, "grad_norm": 0.05594407767057419, "learning_rate": 0.00019172800000000003, "loss": 0.0479, "step": 2070 }, { "epoch": 0.31465093411996065, "grad_norm": 0.06565164029598236, "learning_rate": 0.000191688, "loss": 0.0501, "step": 2080 }, { "epoch": 0.3161636789955374, "grad_norm": 0.07224603742361069, "learning_rate": 0.000191648, "loss": 0.0474, "step": 2090 }, { "epoch": 0.3176764238711141, "grad_norm": 0.07781083881855011, "learning_rate": 0.000191608, "loss": 0.0401, "step": 2100 }, { "epoch": 0.31918916874669084, "grad_norm": 0.08147955685853958, "learning_rate": 0.000191568, "loss": 0.0486, "step": 2110 }, { "epoch": 0.32070191362226763, "grad_norm": 0.05572337657213211, "learning_rate": 0.00019152800000000003, "loss": 0.0488, "step": 2120 }, { "epoch": 0.32221465849784436, "grad_norm": 0.06601813435554504, "learning_rate": 0.000191488, "loss": 0.0466, "step": 2130 }, { "epoch": 0.3237274033734211, "grad_norm": 0.057904861867427826, "learning_rate": 0.00019144800000000001, "loss": 0.0479, "step": 2140 }, { "epoch": 0.32524014824899783, "grad_norm": 0.057231709361076355, "learning_rate": 0.000191408, "loss": 0.0522, "step": 2150 }, { "epoch": 0.32675289312457456, "grad_norm": 0.08306867629289627, "learning_rate": 0.000191368, "loss": 0.0439, "step": 2160 }, { "epoch": 0.3282656380001513, "grad_norm": 0.0742512047290802, "learning_rate": 0.00019132800000000002, "loss": 0.0434, "step": 2170 }, { "epoch": 0.329778382875728, "grad_norm": 0.07260335236787796, "learning_rate": 0.000191288, "loss": 0.0505, "step": 2180 }, { "epoch": 0.33129112775130476, "grad_norm": 0.07398936152458191, "learning_rate": 0.000191248, "loss": 0.0519, "step": 2190 }, { "epoch": 0.3328038726268815, "grad_norm": 0.069728784263134, "learning_rate": 0.000191208, "loss": 0.0501, "step": 2200 }, { "epoch": 0.3328038726268815, "eval_cer": 0.07287520414693144, "eval_loss": 0.041937489062547684, "eval_runtime": 10449.7877, "eval_samples_per_second": 2.015, "eval_steps_per_second": 0.252, "step": 2200 }, { "epoch": 0.3343166175024582, "grad_norm": 0.07778773456811905, "learning_rate": 0.00019116800000000002, "loss": 0.0485, "step": 2210 }, { "epoch": 0.33582936237803496, "grad_norm": 0.08489017933607101, "learning_rate": 0.00019112800000000002, "loss": 0.047, "step": 2220 }, { "epoch": 0.3373421072536117, "grad_norm": 0.0746629610657692, "learning_rate": 0.000191088, "loss": 0.0444, "step": 2230 }, { "epoch": 0.3388548521291884, "grad_norm": 0.07858649641275406, "learning_rate": 0.000191048, "loss": 0.0537, "step": 2240 }, { "epoch": 0.34036759700476515, "grad_norm": 0.08357574045658112, "learning_rate": 0.000191008, "loss": 0.054, "step": 2250 }, { "epoch": 0.3418803418803419, "grad_norm": 0.05976574867963791, "learning_rate": 0.00019096800000000002, "loss": 0.0465, "step": 2260 }, { "epoch": 0.3433930867559186, "grad_norm": 0.07549616694450378, "learning_rate": 0.000190928, "loss": 0.0479, "step": 2270 }, { "epoch": 0.34490583163149535, "grad_norm": 0.07128783315420151, "learning_rate": 0.000190888, "loss": 0.0481, "step": 2280 }, { "epoch": 0.3464185765070721, "grad_norm": 0.05093182995915413, "learning_rate": 0.000190848, "loss": 0.039, "step": 2290 }, { "epoch": 0.3479313213826488, "grad_norm": 0.07213055342435837, "learning_rate": 0.00019080800000000002, "loss": 0.0486, "step": 2300 }, { "epoch": 0.34944406625822555, "grad_norm": 0.08296896517276764, "learning_rate": 0.00019076800000000001, "loss": 0.0436, "step": 2310 }, { "epoch": 0.3509568111338023, "grad_norm": 0.05904708430171013, "learning_rate": 0.000190728, "loss": 0.0457, "step": 2320 }, { "epoch": 0.352469556009379, "grad_norm": 0.07709085941314697, "learning_rate": 0.000190688, "loss": 0.0456, "step": 2330 }, { "epoch": 0.35398230088495575, "grad_norm": 0.061139535158872604, "learning_rate": 0.000190648, "loss": 0.0484, "step": 2340 }, { "epoch": 0.3554950457605325, "grad_norm": 0.11013538390398026, "learning_rate": 0.00019060800000000002, "loss": 0.0463, "step": 2350 }, { "epoch": 0.3570077906361092, "grad_norm": 0.04920123890042305, "learning_rate": 0.000190568, "loss": 0.0404, "step": 2360 }, { "epoch": 0.35852053551168594, "grad_norm": 0.05916327238082886, "learning_rate": 0.00019052800000000003, "loss": 0.0506, "step": 2370 }, { "epoch": 0.3600332803872627, "grad_norm": 0.08169171214103699, "learning_rate": 0.000190488, "loss": 0.0422, "step": 2380 }, { "epoch": 0.3615460252628394, "grad_norm": 0.07195686548948288, "learning_rate": 0.00019044800000000002, "loss": 0.0476, "step": 2390 }, { "epoch": 0.36305877013841614, "grad_norm": 0.06132512912154198, "learning_rate": 0.000190408, "loss": 0.0451, "step": 2400 }, { "epoch": 0.36305877013841614, "eval_cer": 0.22885396051223894, "eval_loss": 0.04164993762969971, "eval_runtime": 10444.7845, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.252, "step": 2400 }, { "epoch": 0.3645715150139929, "grad_norm": 0.06889329850673676, "learning_rate": 0.000190368, "loss": 0.0536, "step": 2410 }, { "epoch": 0.3660842598895696, "grad_norm": 0.06513672322034836, "learning_rate": 0.00019032800000000002, "loss": 0.0472, "step": 2420 }, { "epoch": 0.36759700476514634, "grad_norm": 0.06588304787874222, "learning_rate": 0.000190288, "loss": 0.046, "step": 2430 }, { "epoch": 0.3691097496407231, "grad_norm": 0.07162468135356903, "learning_rate": 0.000190248, "loss": 0.0444, "step": 2440 }, { "epoch": 0.3706224945162998, "grad_norm": 0.05831474810838699, "learning_rate": 0.000190208, "loss": 0.0448, "step": 2450 }, { "epoch": 0.37213523939187654, "grad_norm": 0.11214031279087067, "learning_rate": 0.000190168, "loss": 0.0491, "step": 2460 }, { "epoch": 0.37364798426745327, "grad_norm": 0.07672178000211716, "learning_rate": 0.00019012800000000002, "loss": 0.0489, "step": 2470 }, { "epoch": 0.37516072914303, "grad_norm": 0.07850979268550873, "learning_rate": 0.000190088, "loss": 0.047, "step": 2480 }, { "epoch": 0.37667347401860674, "grad_norm": 0.0473526194691658, "learning_rate": 0.000190048, "loss": 0.0436, "step": 2490 }, { "epoch": 0.3781862188941835, "grad_norm": 0.08313214778900146, "learning_rate": 0.000190008, "loss": 0.0457, "step": 2500 }, { "epoch": 0.37969896376976026, "grad_norm": 0.07851678878068924, "learning_rate": 0.00018996800000000002, "loss": 0.0399, "step": 2510 }, { "epoch": 0.381211708645337, "grad_norm": 0.06067463755607605, "learning_rate": 0.00018992800000000002, "loss": 0.0406, "step": 2520 }, { "epoch": 0.3827244535209137, "grad_norm": 0.07291869819164276, "learning_rate": 0.000189888, "loss": 0.0411, "step": 2530 }, { "epoch": 0.38423719839649045, "grad_norm": 0.05576318874955177, "learning_rate": 0.000189848, "loss": 0.0412, "step": 2540 }, { "epoch": 0.3857499432720672, "grad_norm": 0.05669853091239929, "learning_rate": 0.000189808, "loss": 0.0462, "step": 2550 }, { "epoch": 0.3872626881476439, "grad_norm": 0.0653596743941307, "learning_rate": 0.00018976800000000002, "loss": 0.0504, "step": 2560 }, { "epoch": 0.38877543302322065, "grad_norm": 0.07938168197870255, "learning_rate": 0.000189728, "loss": 0.0423, "step": 2570 }, { "epoch": 0.3902881778987974, "grad_norm": 0.19600598514080048, "learning_rate": 0.000189688, "loss": 0.0422, "step": 2580 }, { "epoch": 0.3918009227743741, "grad_norm": 0.08753781765699387, "learning_rate": 0.000189648, "loss": 0.0485, "step": 2590 }, { "epoch": 0.39331366764995085, "grad_norm": 0.07059615105390549, "learning_rate": 0.00018960800000000002, "loss": 0.0441, "step": 2600 }, { "epoch": 0.39331366764995085, "eval_cer": 0.12797016798729038, "eval_loss": 0.040877681225538254, "eval_runtime": 10426.9488, "eval_samples_per_second": 2.019, "eval_steps_per_second": 0.252, "step": 2600 }, { "epoch": 0.3948264125255276, "grad_norm": 0.07426866888999939, "learning_rate": 0.000189568, "loss": 0.0456, "step": 2610 }, { "epoch": 0.3963391574011043, "grad_norm": 0.05869770795106888, "learning_rate": 0.000189528, "loss": 0.047, "step": 2620 }, { "epoch": 0.39785190227668105, "grad_norm": 0.09353045374155045, "learning_rate": 0.000189488, "loss": 0.0457, "step": 2630 }, { "epoch": 0.3993646471522578, "grad_norm": 0.083396315574646, "learning_rate": 0.000189448, "loss": 0.0441, "step": 2640 }, { "epoch": 0.4008773920278345, "grad_norm": 0.0698527917265892, "learning_rate": 0.000189408, "loss": 0.0469, "step": 2650 }, { "epoch": 0.40239013690341124, "grad_norm": 0.07554033398628235, "learning_rate": 0.000189368, "loss": 0.0523, "step": 2660 }, { "epoch": 0.403902881778988, "grad_norm": 0.08026187121868134, "learning_rate": 0.00018932800000000003, "loss": 0.0492, "step": 2670 }, { "epoch": 0.4054156266545647, "grad_norm": 0.0758117213845253, "learning_rate": 0.000189288, "loss": 0.0471, "step": 2680 }, { "epoch": 0.40692837153014144, "grad_norm": 0.0716470330953598, "learning_rate": 0.00018924800000000001, "loss": 0.0401, "step": 2690 }, { "epoch": 0.4084411164057182, "grad_norm": 0.07114976644515991, "learning_rate": 0.000189208, "loss": 0.0483, "step": 2700 }, { "epoch": 0.4099538612812949, "grad_norm": 0.059242133051157, "learning_rate": 0.000189168, "loss": 0.0416, "step": 2710 }, { "epoch": 0.41146660615687164, "grad_norm": 0.07214327901601791, "learning_rate": 0.00018912800000000002, "loss": 0.0446, "step": 2720 }, { "epoch": 0.41297935103244837, "grad_norm": 0.0404672808945179, "learning_rate": 0.000189088, "loss": 0.0445, "step": 2730 }, { "epoch": 0.4144920959080251, "grad_norm": 0.06663410365581512, "learning_rate": 0.000189048, "loss": 0.0435, "step": 2740 }, { "epoch": 0.41600484078360184, "grad_norm": 0.0690486952662468, "learning_rate": 0.000189008, "loss": 0.048, "step": 2750 }, { "epoch": 0.41751758565917857, "grad_norm": 0.07034830003976822, "learning_rate": 0.00018896800000000002, "loss": 0.0423, "step": 2760 }, { "epoch": 0.4190303305347553, "grad_norm": 0.08420894294977188, "learning_rate": 0.00018892800000000002, "loss": 0.0525, "step": 2770 }, { "epoch": 0.42054307541033203, "grad_norm": 0.07617480307817459, "learning_rate": 0.000188888, "loss": 0.0492, "step": 2780 }, { "epoch": 0.42205582028590877, "grad_norm": 0.06841789186000824, "learning_rate": 0.000188848, "loss": 0.0427, "step": 2790 }, { "epoch": 0.4235685651614855, "grad_norm": 0.07013357430696487, "learning_rate": 0.000188808, "loss": 0.04, "step": 2800 }, { "epoch": 0.4235685651614855, "eval_cer": 0.26005539454405746, "eval_loss": 0.04089580848813057, "eval_runtime": 10530.3682, "eval_samples_per_second": 1.999, "eval_steps_per_second": 0.25, "step": 2800 }, { "epoch": 0.42508131003706223, "grad_norm": 0.06432001292705536, "learning_rate": 0.00018876800000000002, "loss": 0.0402, "step": 2810 }, { "epoch": 0.42659405491263896, "grad_norm": 0.06437406688928604, "learning_rate": 0.000188728, "loss": 0.0397, "step": 2820 }, { "epoch": 0.4281067997882157, "grad_norm": 0.0579422190785408, "learning_rate": 0.000188688, "loss": 0.0431, "step": 2830 }, { "epoch": 0.42961954466379243, "grad_norm": 0.0628400593996048, "learning_rate": 0.000188648, "loss": 0.0426, "step": 2840 }, { "epoch": 0.43113228953936916, "grad_norm": 0.04976367950439453, "learning_rate": 0.000188608, "loss": 0.0448, "step": 2850 }, { "epoch": 0.4326450344149459, "grad_norm": 0.07479149103164673, "learning_rate": 0.00018856800000000001, "loss": 0.0458, "step": 2860 }, { "epoch": 0.4341577792905226, "grad_norm": 0.06853318214416504, "learning_rate": 0.000188528, "loss": 0.045, "step": 2870 }, { "epoch": 0.43567052416609936, "grad_norm": 0.08534535765647888, "learning_rate": 0.00018848800000000003, "loss": 0.044, "step": 2880 }, { "epoch": 0.43718326904167615, "grad_norm": 0.05148012563586235, "learning_rate": 0.000188448, "loss": 0.0448, "step": 2890 }, { "epoch": 0.4386960139172529, "grad_norm": 0.073714479804039, "learning_rate": 0.00018840800000000002, "loss": 0.0388, "step": 2900 }, { "epoch": 0.4402087587928296, "grad_norm": 0.06875050067901611, "learning_rate": 0.000188368, "loss": 0.0476, "step": 2910 }, { "epoch": 0.44172150366840635, "grad_norm": 0.07048488408327103, "learning_rate": 0.000188328, "loss": 0.0537, "step": 2920 }, { "epoch": 0.4432342485439831, "grad_norm": 0.06159156188368797, "learning_rate": 0.00018828800000000002, "loss": 0.0523, "step": 2930 }, { "epoch": 0.4447469934195598, "grad_norm": 0.0851297378540039, "learning_rate": 0.000188248, "loss": 0.0466, "step": 2940 }, { "epoch": 0.44625973829513654, "grad_norm": 0.07920840382575989, "learning_rate": 0.000188208, "loss": 0.0434, "step": 2950 }, { "epoch": 0.4477724831707133, "grad_norm": 0.06767392158508301, "learning_rate": 0.000188168, "loss": 0.0446, "step": 2960 }, { "epoch": 0.44928522804629, "grad_norm": 0.0621979758143425, "learning_rate": 0.00018812800000000003, "loss": 0.0514, "step": 2970 }, { "epoch": 0.45079797292186674, "grad_norm": 0.06485885381698608, "learning_rate": 0.00018808800000000002, "loss": 0.0403, "step": 2980 }, { "epoch": 0.4523107177974435, "grad_norm": 0.07618974149227142, "learning_rate": 0.000188048, "loss": 0.046, "step": 2990 }, { "epoch": 0.4538234626730202, "grad_norm": 0.050627488642930984, "learning_rate": 0.000188008, "loss": 0.04, "step": 3000 }, { "epoch": 0.4538234626730202, "eval_cer": 0.027385337988253985, "eval_loss": 0.0410909466445446, "eval_runtime": 11737.0194, "eval_samples_per_second": 1.794, "eval_steps_per_second": 0.224, "step": 3000 }, { "epoch": 0.45533620754859694, "grad_norm": 0.07569224387407303, "learning_rate": 0.000187968, "loss": 0.0453, "step": 3010 }, { "epoch": 0.45684895242417367, "grad_norm": 0.06267885118722916, "learning_rate": 0.00018792800000000002, "loss": 0.0519, "step": 3020 }, { "epoch": 0.4583616972997504, "grad_norm": 0.0801217257976532, "learning_rate": 0.00018788800000000001, "loss": 0.0452, "step": 3030 }, { "epoch": 0.45987444217532714, "grad_norm": 0.06966337561607361, "learning_rate": 0.000187848, "loss": 0.0459, "step": 3040 }, { "epoch": 0.46138718705090387, "grad_norm": 0.05708028003573418, "learning_rate": 0.000187808, "loss": 0.0462, "step": 3050 }, { "epoch": 0.4628999319264806, "grad_norm": 0.06033516675233841, "learning_rate": 0.00018776800000000002, "loss": 0.0459, "step": 3060 }, { "epoch": 0.46441267680205733, "grad_norm": 0.06908197700977325, "learning_rate": 0.00018772800000000002, "loss": 0.048, "step": 3070 }, { "epoch": 0.46592542167763407, "grad_norm": 0.0723978653550148, "learning_rate": 0.000187688, "loss": 0.047, "step": 3080 }, { "epoch": 0.4674381665532108, "grad_norm": 0.06268727034330368, "learning_rate": 0.000187648, "loss": 0.0387, "step": 3090 }, { "epoch": 0.46895091142878753, "grad_norm": 0.06796183437108994, "learning_rate": 0.000187608, "loss": 0.0379, "step": 3100 }, { "epoch": 0.47046365630436426, "grad_norm": 0.08227751404047012, "learning_rate": 0.00018756800000000002, "loss": 0.0497, "step": 3110 }, { "epoch": 0.471976401179941, "grad_norm": 0.06391087174415588, "learning_rate": 0.000187528, "loss": 0.045, "step": 3120 }, { "epoch": 0.47348914605551773, "grad_norm": 0.09645809978246689, "learning_rate": 0.00018748800000000003, "loss": 0.0479, "step": 3130 }, { "epoch": 0.47500189093109446, "grad_norm": 0.07187838107347488, "learning_rate": 0.000187448, "loss": 0.0438, "step": 3140 }, { "epoch": 0.4765146358066712, "grad_norm": 0.06578271836042404, "learning_rate": 0.00018740800000000002, "loss": 0.0471, "step": 3150 }, { "epoch": 0.4780273806822479, "grad_norm": 0.06598031520843506, "learning_rate": 0.000187368, "loss": 0.0463, "step": 3160 }, { "epoch": 0.47954012555782466, "grad_norm": 0.06380560249090195, "learning_rate": 0.000187328, "loss": 0.0439, "step": 3170 }, { "epoch": 0.4810528704334014, "grad_norm": 0.05300907790660858, "learning_rate": 0.00018728800000000003, "loss": 0.0385, "step": 3180 }, { "epoch": 0.4825656153089781, "grad_norm": 0.08515879511833191, "learning_rate": 0.000187248, "loss": 0.0444, "step": 3190 }, { "epoch": 0.48407836018455486, "grad_norm": 0.0779171735048294, "learning_rate": 0.00018720800000000001, "loss": 0.0453, "step": 3200 }, { "epoch": 0.48407836018455486, "eval_cer": 0.010036246117811001, "eval_loss": 0.04116720333695412, "eval_runtime": 10575.268, "eval_samples_per_second": 1.991, "eval_steps_per_second": 0.249, "step": 3200 }, { "epoch": 0.4855911050601316, "grad_norm": 0.07719563692808151, "learning_rate": 0.000187168, "loss": 0.0516, "step": 3210 }, { "epoch": 0.4871038499357083, "grad_norm": 0.0623527429997921, "learning_rate": 0.000187128, "loss": 0.0412, "step": 3220 }, { "epoch": 0.48861659481128505, "grad_norm": 0.05286158621311188, "learning_rate": 0.00018708800000000002, "loss": 0.0433, "step": 3230 }, { "epoch": 0.4901293396868618, "grad_norm": 0.05317120626568794, "learning_rate": 0.000187048, "loss": 0.0451, "step": 3240 }, { "epoch": 0.4916420845624385, "grad_norm": 0.06447257846593857, "learning_rate": 0.000187008, "loss": 0.0552, "step": 3250 }, { "epoch": 0.49315482943801525, "grad_norm": 0.05432993173599243, "learning_rate": 0.000186968, "loss": 0.0454, "step": 3260 }, { "epoch": 0.49466757431359204, "grad_norm": 0.07853369414806366, "learning_rate": 0.00018692800000000002, "loss": 0.0513, "step": 3270 }, { "epoch": 0.4961803191891688, "grad_norm": 0.07532196491956711, "learning_rate": 0.00018688800000000002, "loss": 0.0494, "step": 3280 }, { "epoch": 0.4976930640647455, "grad_norm": 0.0591423436999321, "learning_rate": 0.000186848, "loss": 0.0406, "step": 3290 }, { "epoch": 0.49920580894032224, "grad_norm": 0.05588558688759804, "learning_rate": 0.000186808, "loss": 0.0454, "step": 3300 }, { "epoch": 0.5007185538158989, "grad_norm": 0.06208329647779465, "learning_rate": 0.000186768, "loss": 0.0379, "step": 3310 }, { "epoch": 0.5022312986914756, "grad_norm": 0.09954684972763062, "learning_rate": 0.00018672800000000002, "loss": 0.0441, "step": 3320 }, { "epoch": 0.5037440435670524, "grad_norm": 0.06522241979837418, "learning_rate": 0.000186688, "loss": 0.0435, "step": 3330 }, { "epoch": 0.5052567884426291, "grad_norm": 0.06771814823150635, "learning_rate": 0.000186648, "loss": 0.0407, "step": 3340 }, { "epoch": 0.5067695333182058, "grad_norm": 0.09186646342277527, "learning_rate": 0.000186608, "loss": 0.0468, "step": 3350 }, { "epoch": 0.5082822781937826, "grad_norm": 0.05741488188505173, "learning_rate": 0.00018656800000000002, "loss": 0.0427, "step": 3360 }, { "epoch": 0.5097950230693593, "grad_norm": 0.078957200050354, "learning_rate": 0.00018652800000000001, "loss": 0.0524, "step": 3370 }, { "epoch": 0.511307767944936, "grad_norm": 0.06480754166841507, "learning_rate": 0.000186488, "loss": 0.0491, "step": 3380 }, { "epoch": 0.5128205128205128, "grad_norm": 0.07016266882419586, "learning_rate": 0.000186448, "loss": 0.0455, "step": 3390 }, { "epoch": 0.5143332576960895, "grad_norm": 0.09549427777528763, "learning_rate": 0.000186408, "loss": 0.0435, "step": 3400 }, { "epoch": 0.5143332576960895, "eval_cer": 0.06014582453123417, "eval_loss": 0.040756821632385254, "eval_runtime": 10458.365, "eval_samples_per_second": 2.013, "eval_steps_per_second": 0.252, "step": 3400 }, { "epoch": 0.5158460025716662, "grad_norm": 0.06771855056285858, "learning_rate": 0.00018636800000000002, "loss": 0.0496, "step": 3410 }, { "epoch": 0.517358747447243, "grad_norm": 0.051270436495542526, "learning_rate": 0.000186328, "loss": 0.0376, "step": 3420 }, { "epoch": 0.5188714923228197, "grad_norm": 0.05424557998776436, "learning_rate": 0.00018628800000000003, "loss": 0.0455, "step": 3430 }, { "epoch": 0.5203842371983964, "grad_norm": 0.07000952959060669, "learning_rate": 0.000186248, "loss": 0.0494, "step": 3440 }, { "epoch": 0.5218969820739732, "grad_norm": 0.06696450710296631, "learning_rate": 0.00018620800000000002, "loss": 0.0449, "step": 3450 }, { "epoch": 0.52340972694955, "grad_norm": 0.07243742048740387, "learning_rate": 0.000186168, "loss": 0.0481, "step": 3460 }, { "epoch": 0.5249224718251267, "grad_norm": 0.07457748800516129, "learning_rate": 0.000186128, "loss": 0.0413, "step": 3470 }, { "epoch": 0.5264352167007035, "grad_norm": 0.05373325198888779, "learning_rate": 0.00018608800000000002, "loss": 0.046, "step": 3480 }, { "epoch": 0.5279479615762802, "grad_norm": 0.07769589871168137, "learning_rate": 0.000186048, "loss": 0.0443, "step": 3490 }, { "epoch": 0.5294607064518569, "grad_norm": 0.05949350818991661, "learning_rate": 0.000186008, "loss": 0.0426, "step": 3500 }, { "epoch": 0.5309734513274337, "grad_norm": 0.08557622879743576, "learning_rate": 0.000185968, "loss": 0.0436, "step": 3510 }, { "epoch": 0.5324861962030104, "grad_norm": 0.07504332065582275, "learning_rate": 0.00018592800000000003, "loss": 0.045, "step": 3520 }, { "epoch": 0.5339989410785871, "grad_norm": 0.08510497957468033, "learning_rate": 0.00018588800000000002, "loss": 0.0451, "step": 3530 }, { "epoch": 0.5355116859541639, "grad_norm": 0.06645802408456802, "learning_rate": 0.000185848, "loss": 0.0459, "step": 3540 }, { "epoch": 0.5370244308297406, "grad_norm": 0.05905970185995102, "learning_rate": 0.000185808, "loss": 0.0431, "step": 3550 }, { "epoch": 0.5385371757053173, "grad_norm": 0.059341125190258026, "learning_rate": 0.000185768, "loss": 0.0521, "step": 3560 }, { "epoch": 0.5400499205808941, "grad_norm": 0.07676515728235245, "learning_rate": 0.00018572800000000002, "loss": 0.0446, "step": 3570 }, { "epoch": 0.5415626654564708, "grad_norm": 0.05860384181141853, "learning_rate": 0.00018568800000000002, "loss": 0.041, "step": 3580 }, { "epoch": 0.5430754103320475, "grad_norm": 0.07133147865533829, "learning_rate": 0.000185648, "loss": 0.0479, "step": 3590 }, { "epoch": 0.5445881552076243, "grad_norm": 0.058478474617004395, "learning_rate": 0.000185608, "loss": 0.0447, "step": 3600 }, { "epoch": 0.5445881552076243, "eval_cer": 0.16368877753976077, "eval_loss": 0.04047335311770439, "eval_runtime": 10446.0422, "eval_samples_per_second": 2.015, "eval_steps_per_second": 0.252, "step": 3600 }, { "epoch": 0.546100900083201, "grad_norm": 0.06725309789180756, "learning_rate": 0.000185568, "loss": 0.053, "step": 3610 }, { "epoch": 0.5476136449587777, "grad_norm": 0.06334862858057022, "learning_rate": 0.00018552800000000002, "loss": 0.0451, "step": 3620 }, { "epoch": 0.5491263898343545, "grad_norm": 0.12283937633037567, "learning_rate": 0.000185488, "loss": 0.0437, "step": 3630 }, { "epoch": 0.5506391347099312, "grad_norm": 0.05931037664413452, "learning_rate": 0.000185448, "loss": 0.0431, "step": 3640 }, { "epoch": 0.5521518795855079, "grad_norm": 0.05501909554004669, "learning_rate": 0.000185408, "loss": 0.0398, "step": 3650 }, { "epoch": 0.5536646244610847, "grad_norm": 0.06066635251045227, "learning_rate": 0.00018536800000000002, "loss": 0.0497, "step": 3660 }, { "epoch": 0.5551773693366614, "grad_norm": 0.1352480947971344, "learning_rate": 0.000185328, "loss": 0.0445, "step": 3670 }, { "epoch": 0.5566901142122381, "grad_norm": 0.08712221682071686, "learning_rate": 0.000185288, "loss": 0.0485, "step": 3680 }, { "epoch": 0.5582028590878149, "grad_norm": 0.06511665135622025, "learning_rate": 0.000185248, "loss": 0.0464, "step": 3690 }, { "epoch": 0.5597156039633916, "grad_norm": 0.052760981023311615, "learning_rate": 0.000185208, "loss": 0.0417, "step": 3700 }, { "epoch": 0.5612283488389683, "grad_norm": 0.05113260820508003, "learning_rate": 0.000185168, "loss": 0.0426, "step": 3710 }, { "epoch": 0.5627410937145451, "grad_norm": 0.06565012037754059, "learning_rate": 0.000185128, "loss": 0.0397, "step": 3720 }, { "epoch": 0.5642538385901218, "grad_norm": 0.0608823299407959, "learning_rate": 0.00018508800000000003, "loss": 0.0411, "step": 3730 }, { "epoch": 0.5657665834656985, "grad_norm": 0.0670706033706665, "learning_rate": 0.000185048, "loss": 0.0495, "step": 3740 }, { "epoch": 0.5672793283412753, "grad_norm": 0.07000606507062912, "learning_rate": 0.00018500800000000001, "loss": 0.0457, "step": 3750 }, { "epoch": 0.568792073216852, "grad_norm": 0.08072007447481155, "learning_rate": 0.000184968, "loss": 0.0484, "step": 3760 }, { "epoch": 0.5703048180924287, "grad_norm": 0.06795356422662735, "learning_rate": 0.000184928, "loss": 0.0495, "step": 3770 }, { "epoch": 0.5718175629680055, "grad_norm": 0.3031274974346161, "learning_rate": 0.00018488800000000002, "loss": 0.0504, "step": 3780 }, { "epoch": 0.5733303078435822, "grad_norm": 0.05166814848780632, "learning_rate": 0.000184848, "loss": 0.0442, "step": 3790 }, { "epoch": 0.5748430527191589, "grad_norm": 0.08816450089216232, "learning_rate": 0.000184808, "loss": 0.0525, "step": 3800 }, { "epoch": 0.5748430527191589, "eval_cer": 0.09852050611143642, "eval_loss": 0.041136305779218674, "eval_runtime": 10432.1011, "eval_samples_per_second": 2.018, "eval_steps_per_second": 0.252, "step": 3800 }, { "epoch": 0.5763557975947357, "grad_norm": 0.06531400233507156, "learning_rate": 0.000184768, "loss": 0.0459, "step": 3810 }, { "epoch": 0.5778685424703124, "grad_norm": 0.07049426436424255, "learning_rate": 0.00018472800000000002, "loss": 0.0386, "step": 3820 }, { "epoch": 0.5793812873458891, "grad_norm": 0.07954803854227066, "learning_rate": 0.00018468800000000002, "loss": 0.0451, "step": 3830 }, { "epoch": 0.5808940322214659, "grad_norm": 0.07543455064296722, "learning_rate": 0.000184648, "loss": 0.0406, "step": 3840 }, { "epoch": 0.5824067770970426, "grad_norm": 0.08292882144451141, "learning_rate": 0.000184608, "loss": 0.0544, "step": 3850 }, { "epoch": 0.5839195219726193, "grad_norm": 0.05814971402287483, "learning_rate": 0.000184568, "loss": 0.0441, "step": 3860 }, { "epoch": 0.585432266848196, "grad_norm": 0.06112606078386307, "learning_rate": 0.00018452800000000002, "loss": 0.0482, "step": 3870 }, { "epoch": 0.5869450117237728, "grad_norm": 0.08487452566623688, "learning_rate": 0.000184488, "loss": 0.0446, "step": 3880 }, { "epoch": 0.5884577565993495, "grad_norm": 0.05025780200958252, "learning_rate": 0.000184448, "loss": 0.0453, "step": 3890 }, { "epoch": 0.5899705014749262, "grad_norm": 0.10276935994625092, "learning_rate": 0.000184408, "loss": 0.0427, "step": 3900 }, { "epoch": 0.591483246350503, "grad_norm": 0.11926810443401337, "learning_rate": 0.000184368, "loss": 0.0472, "step": 3910 }, { "epoch": 0.5929959912260797, "grad_norm": 0.08615875244140625, "learning_rate": 0.00018432800000000001, "loss": 0.0504, "step": 3920 }, { "epoch": 0.5945087361016564, "grad_norm": 0.05418393015861511, "learning_rate": 0.000184288, "loss": 0.0397, "step": 3930 }, { "epoch": 0.5960214809772332, "grad_norm": 0.06980731338262558, "learning_rate": 0.000184248, "loss": 0.0407, "step": 3940 }, { "epoch": 0.5975342258528099, "grad_norm": 0.07121722400188446, "learning_rate": 0.000184208, "loss": 0.0441, "step": 3950 }, { "epoch": 0.5990469707283866, "grad_norm": 0.05750627815723419, "learning_rate": 0.00018416800000000002, "loss": 0.049, "step": 3960 }, { "epoch": 0.6005597156039634, "grad_norm": 0.08207126706838608, "learning_rate": 0.000184128, "loss": 0.0475, "step": 3970 }, { "epoch": 0.6020724604795401, "grad_norm": 0.07319646328687668, "learning_rate": 0.000184088, "loss": 0.0517, "step": 3980 }, { "epoch": 0.6035852053551168, "grad_norm": 0.06762152910232544, "learning_rate": 0.000184048, "loss": 0.042, "step": 3990 }, { "epoch": 0.6050979502306936, "grad_norm": 0.05603775382041931, "learning_rate": 0.000184008, "loss": 0.0434, "step": 4000 }, { "epoch": 0.6050979502306936, "eval_cer": 0.2283245991802003, "eval_loss": 0.03986261412501335, "eval_runtime": 10464.7689, "eval_samples_per_second": 2.012, "eval_steps_per_second": 0.252, "step": 4000 }, { "epoch": 0.6066106951062703, "grad_norm": 0.05094938725233078, "learning_rate": 0.000183968, "loss": 0.0493, "step": 4010 }, { "epoch": 0.608123439981847, "grad_norm": 0.08996951580047607, "learning_rate": 0.000183928, "loss": 0.0475, "step": 4020 }, { "epoch": 0.6096361848574238, "grad_norm": 0.07369961589574814, "learning_rate": 0.00018388800000000003, "loss": 0.0441, "step": 4030 }, { "epoch": 0.6111489297330005, "grad_norm": 0.06135983020067215, "learning_rate": 0.000183848, "loss": 0.0421, "step": 4040 }, { "epoch": 0.6126616746085772, "grad_norm": 0.04601254314184189, "learning_rate": 0.000183808, "loss": 0.037, "step": 4050 }, { "epoch": 0.614174419484154, "grad_norm": 0.04949349910020828, "learning_rate": 0.000183768, "loss": 0.0424, "step": 4060 }, { "epoch": 0.6156871643597307, "grad_norm": 0.08714490383863449, "learning_rate": 0.000183728, "loss": 0.0459, "step": 4070 }, { "epoch": 0.6171999092353074, "grad_norm": 0.07733121514320374, "learning_rate": 0.00018368800000000002, "loss": 0.0423, "step": 4080 }, { "epoch": 0.6187126541108842, "grad_norm": 0.070652537047863, "learning_rate": 0.000183648, "loss": 0.0417, "step": 4090 }, { "epoch": 0.6202253989864609, "grad_norm": 0.08538975566625595, "learning_rate": 0.000183608, "loss": 0.045, "step": 4100 }, { "epoch": 0.6217381438620376, "grad_norm": 0.07866961508989334, "learning_rate": 0.000183568, "loss": 0.0435, "step": 4110 }, { "epoch": 0.6232508887376144, "grad_norm": 0.052214980125427246, "learning_rate": 0.00018352800000000002, "loss": 0.0389, "step": 4120 }, { "epoch": 0.6247636336131911, "grad_norm": 0.07548975199460983, "learning_rate": 0.00018348800000000002, "loss": 0.0406, "step": 4130 }, { "epoch": 0.6262763784887678, "grad_norm": 0.06064745783805847, "learning_rate": 0.000183448, "loss": 0.0405, "step": 4140 }, { "epoch": 0.6277891233643446, "grad_norm": 0.06255548447370529, "learning_rate": 0.000183408, "loss": 0.0426, "step": 4150 }, { "epoch": 0.6293018682399213, "grad_norm": 0.05550558492541313, "learning_rate": 0.000183368, "loss": 0.0432, "step": 4160 }, { "epoch": 0.630814613115498, "grad_norm": 0.06224781274795532, "learning_rate": 0.00018332800000000002, "loss": 0.0489, "step": 4170 }, { "epoch": 0.6323273579910748, "grad_norm": 0.04567689448595047, "learning_rate": 0.000183288, "loss": 0.0392, "step": 4180 }, { "epoch": 0.6338401028666515, "grad_norm": 0.08686509728431702, "learning_rate": 0.00018324800000000003, "loss": 0.0503, "step": 4190 }, { "epoch": 0.6353528477422282, "grad_norm": 0.039897847920656204, "learning_rate": 0.000183208, "loss": 0.0437, "step": 4200 }, { "epoch": 0.6353528477422282, "eval_cer": 0.0028697931722888917, "eval_loss": 0.03980256989598274, "eval_runtime": 10439.5254, "eval_samples_per_second": 2.017, "eval_steps_per_second": 0.252, "step": 4200 }, { "epoch": 0.636865592617805, "grad_norm": 0.07222657650709152, "learning_rate": 0.00018316800000000002, "loss": 0.0445, "step": 4210 }, { "epoch": 0.6383783374933817, "grad_norm": 0.06796406954526901, "learning_rate": 0.000183128, "loss": 0.0452, "step": 4220 }, { "epoch": 0.6398910823689585, "grad_norm": 0.07380914688110352, "learning_rate": 0.000183088, "loss": 0.0456, "step": 4230 }, { "epoch": 0.6414038272445353, "grad_norm": 0.05780802294611931, "learning_rate": 0.00018304800000000003, "loss": 0.043, "step": 4240 }, { "epoch": 0.642916572120112, "grad_norm": 0.07155787944793701, "learning_rate": 0.000183008, "loss": 0.0422, "step": 4250 }, { "epoch": 0.6444293169956887, "grad_norm": 0.06419336050748825, "learning_rate": 0.00018296800000000001, "loss": 0.0453, "step": 4260 }, { "epoch": 0.6459420618712655, "grad_norm": 0.06702402234077454, "learning_rate": 0.000182928, "loss": 0.0416, "step": 4270 }, { "epoch": 0.6474548067468422, "grad_norm": 0.062247395515441895, "learning_rate": 0.00018288800000000003, "loss": 0.0431, "step": 4280 }, { "epoch": 0.6489675516224189, "grad_norm": 0.05556045100092888, "learning_rate": 0.00018284800000000002, "loss": 0.0542, "step": 4290 }, { "epoch": 0.6504802964979957, "grad_norm": 0.07586701959371567, "learning_rate": 0.000182808, "loss": 0.0476, "step": 4300 }, { "epoch": 0.6519930413735724, "grad_norm": 0.056563302874565125, "learning_rate": 0.000182768, "loss": 0.0441, "step": 4310 }, { "epoch": 0.6535057862491491, "grad_norm": 0.08210831135511398, "learning_rate": 0.000182728, "loss": 0.0428, "step": 4320 }, { "epoch": 0.6550185311247259, "grad_norm": 0.06154036149382591, "learning_rate": 0.00018268800000000002, "loss": 0.0437, "step": 4330 }, { "epoch": 0.6565312760003026, "grad_norm": 0.06387040764093399, "learning_rate": 0.00018264800000000002, "loss": 0.0503, "step": 4340 }, { "epoch": 0.6580440208758793, "grad_norm": 0.07460694015026093, "learning_rate": 0.000182608, "loss": 0.0388, "step": 4350 }, { "epoch": 0.659556765751456, "grad_norm": 0.05871427804231644, "learning_rate": 0.000182568, "loss": 0.0409, "step": 4360 }, { "epoch": 0.6610695106270328, "grad_norm": 0.05525946244597435, "learning_rate": 0.000182528, "loss": 0.0403, "step": 4370 }, { "epoch": 0.6625822555026095, "grad_norm": 0.07400190085172653, "learning_rate": 0.00018248800000000002, "loss": 0.0544, "step": 4380 }, { "epoch": 0.6640950003781863, "grad_norm": 0.05236358568072319, "learning_rate": 0.000182448, "loss": 0.0424, "step": 4390 }, { "epoch": 0.665607745253763, "grad_norm": 0.07223962247371674, "learning_rate": 0.000182408, "loss": 0.0427, "step": 4400 }, { "epoch": 0.665607745253763, "eval_cer": 0.22895526186399429, "eval_loss": 0.039881668984889984, "eval_runtime": 10486.5948, "eval_samples_per_second": 2.008, "eval_steps_per_second": 0.251, "step": 4400 }, { "epoch": 0.6671204901293397, "grad_norm": 0.04777299240231514, "learning_rate": 0.000182368, "loss": 0.0365, "step": 4410 }, { "epoch": 0.6686332350049164, "grad_norm": 0.06789238750934601, "learning_rate": 0.00018232800000000002, "loss": 0.041, "step": 4420 }, { "epoch": 0.6701459798804932, "grad_norm": 0.07556366920471191, "learning_rate": 0.00018228800000000001, "loss": 0.0454, "step": 4430 }, { "epoch": 0.6716587247560699, "grad_norm": 0.05699057877063751, "learning_rate": 0.000182248, "loss": 0.0412, "step": 4440 }, { "epoch": 0.6731714696316466, "grad_norm": 0.06115678697824478, "learning_rate": 0.000182208, "loss": 0.0494, "step": 4450 }, { "epoch": 0.6746842145072234, "grad_norm": 0.16907750070095062, "learning_rate": 0.000182168, "loss": 0.0457, "step": 4460 }, { "epoch": 0.6761969593828001, "grad_norm": 0.23710806667804718, "learning_rate": 0.00018212800000000002, "loss": 0.0491, "step": 4470 }, { "epoch": 0.6777097042583768, "grad_norm": 0.13006287813186646, "learning_rate": 0.000182088, "loss": 0.0528, "step": 4480 }, { "epoch": 0.6792224491339536, "grad_norm": 0.24661995470523834, "learning_rate": 0.00018204800000000003, "loss": 0.043, "step": 4490 }, { "epoch": 0.6807351940095303, "grad_norm": 0.2757125198841095, "learning_rate": 0.000182008, "loss": 0.0477, "step": 4500 }, { "epoch": 0.682247938885107, "grad_norm": 0.27585530281066895, "learning_rate": 0.00018196800000000002, "loss": 0.0486, "step": 4510 }, { "epoch": 0.6837606837606838, "grad_norm": 0.10548703372478485, "learning_rate": 0.000181928, "loss": 0.0448, "step": 4520 }, { "epoch": 0.6852734286362605, "grad_norm": 0.1989259272813797, "learning_rate": 0.000181888, "loss": 0.0508, "step": 4530 }, { "epoch": 0.6867861735118372, "grad_norm": 0.10586623847484589, "learning_rate": 0.00018184800000000002, "loss": 0.0486, "step": 4540 }, { "epoch": 0.688298918387414, "grad_norm": 0.09687965363264084, "learning_rate": 0.000181808, "loss": 0.0463, "step": 4550 }, { "epoch": 0.6898116632629907, "grad_norm": 0.13362692296504974, "learning_rate": 0.000181768, "loss": 0.0441, "step": 4560 }, { "epoch": 0.6913244081385674, "grad_norm": 0.07124081254005432, "learning_rate": 0.000181728, "loss": 0.0479, "step": 4570 }, { "epoch": 0.6928371530141442, "grad_norm": 0.060886889696121216, "learning_rate": 0.00018168800000000003, "loss": 0.0425, "step": 4580 }, { "epoch": 0.6943498978897209, "grad_norm": 0.09697773307561874, "learning_rate": 0.00018164800000000002, "loss": 0.0466, "step": 4590 }, { "epoch": 0.6958626427652976, "grad_norm": 0.09655246883630753, "learning_rate": 0.00018160800000000001, "loss": 0.0423, "step": 4600 }, { "epoch": 0.6958626427652976, "eval_cer": 0.3264485475609846, "eval_loss": 0.04431215673685074, "eval_runtime": 9966.6677, "eval_samples_per_second": 2.112, "eval_steps_per_second": 0.264, "step": 4600 }, { "epoch": 0.6973753876408744, "grad_norm": 0.6920335292816162, "learning_rate": 0.000181568, "loss": 0.0612, "step": 4610 }, { "epoch": 0.6988881325164511, "grad_norm": 21.773630142211914, "learning_rate": 0.00018153600000000002, "loss": 0.3452, "step": 4620 }, { "epoch": 0.7004008773920278, "grad_norm": 0.6047945022583008, "learning_rate": 0.0001815, "loss": 0.8043, "step": 4630 }, { "epoch": 0.7019136222676046, "grad_norm": 0.30588680505752563, "learning_rate": 0.00018146000000000001, "loss": 0.094, "step": 4640 }, { "epoch": 0.7034263671431813, "grad_norm": 2.5436811447143555, "learning_rate": 0.00018142, "loss": 0.1421, "step": 4650 }, { "epoch": 0.704939112018758, "grad_norm": 3.3921713829040527, "learning_rate": 0.00018138000000000003, "loss": 0.2285, "step": 4660 }, { "epoch": 0.7064518568943348, "grad_norm": 6.751514434814453, "learning_rate": 0.00018134, "loss": 0.1609, "step": 4670 }, { "epoch": 0.7079646017699115, "grad_norm": 0.2919982075691223, "learning_rate": 0.00018130000000000002, "loss": 0.0731, "step": 4680 }, { "epoch": 0.7094773466454882, "grad_norm": 0.2757503092288971, "learning_rate": 0.00018126, "loss": 0.0553, "step": 4690 }, { "epoch": 0.710990091521065, "grad_norm": 0.12121643126010895, "learning_rate": 0.00018122, "loss": 0.0637, "step": 4700 }, { "epoch": 0.7125028363966417, "grad_norm": 0.6880851984024048, "learning_rate": 0.00018118000000000002, "loss": 0.0556, "step": 4710 }, { "epoch": 0.7140155812722184, "grad_norm": 0.17397326231002808, "learning_rate": 0.00018114, "loss": 0.0619, "step": 4720 }, { "epoch": 0.7155283261477952, "grad_norm": 0.4361652433872223, "learning_rate": 0.0001811, "loss": 0.052, "step": 4730 }, { "epoch": 0.7170410710233719, "grad_norm": 0.08802498877048492, "learning_rate": 0.00018106, "loss": 0.0531, "step": 4740 }, { "epoch": 0.7185538158989486, "grad_norm": 0.16508696973323822, "learning_rate": 0.00018102000000000003, "loss": 0.0519, "step": 4750 }, { "epoch": 0.7200665607745254, "grad_norm": 0.1359723061323166, "learning_rate": 0.00018098000000000002, "loss": 0.0559, "step": 4760 }, { "epoch": 0.7215793056501021, "grad_norm": 0.12716355919837952, "learning_rate": 0.00018093999999999999, "loss": 0.0478, "step": 4770 }, { "epoch": 0.7230920505256788, "grad_norm": 0.24563723802566528, "learning_rate": 0.0001809, "loss": 0.0508, "step": 4780 }, { "epoch": 0.7246047954012556, "grad_norm": 0.15526343882083893, "learning_rate": 0.00018086, "loss": 0.053, "step": 4790 }, { "epoch": 0.7261175402768323, "grad_norm": 0.39961257576942444, "learning_rate": 0.00018082000000000002, "loss": 0.0543, "step": 4800 }, { "epoch": 0.7261175402768323, "eval_cer": 0.8969592299120654, "eval_loss": 0.04724743589758873, "eval_runtime": 9508.4862, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.277, "step": 4800 }, { "epoch": 0.727630285152409, "grad_norm": 0.11674599349498749, "learning_rate": 0.00018078000000000001, "loss": 0.045, "step": 4810 }, { "epoch": 0.7291430300279858, "grad_norm": 0.12775878608226776, "learning_rate": 0.00018074, "loss": 0.0507, "step": 4820 }, { "epoch": 0.7306557749035625, "grad_norm": 0.21720856428146362, "learning_rate": 0.0001807, "loss": 0.0507, "step": 4830 }, { "epoch": 0.7321685197791392, "grad_norm": 0.09953787177801132, "learning_rate": 0.00018066, "loss": 0.0455, "step": 4840 }, { "epoch": 0.733681264654716, "grad_norm": 0.1652969866991043, "learning_rate": 0.00018062000000000002, "loss": 0.058, "step": 4850 }, { "epoch": 0.7351940095302927, "grad_norm": 0.15136420726776123, "learning_rate": 0.00018058, "loss": 0.0403, "step": 4860 }, { "epoch": 0.7367067544058694, "grad_norm": 0.09294873476028442, "learning_rate": 0.00018054, "loss": 0.0454, "step": 4870 }, { "epoch": 0.7382194992814461, "grad_norm": 0.06313528120517731, "learning_rate": 0.0001805, "loss": 0.0486, "step": 4880 }, { "epoch": 0.7397322441570229, "grad_norm": 0.10854914039373398, "learning_rate": 0.00018046000000000002, "loss": 0.0419, "step": 4890 }, { "epoch": 0.7412449890325996, "grad_norm": 0.08302963525056839, "learning_rate": 0.00018042, "loss": 0.0447, "step": 4900 }, { "epoch": 0.7427577339081763, "grad_norm": 0.0761631429195404, "learning_rate": 0.00018038, "loss": 0.0446, "step": 4910 }, { "epoch": 0.7442704787837531, "grad_norm": 0.10130470246076584, "learning_rate": 0.00018034, "loss": 0.045, "step": 4920 }, { "epoch": 0.7457832236593298, "grad_norm": 0.18436622619628906, "learning_rate": 0.0001803, "loss": 0.0429, "step": 4930 }, { "epoch": 0.7472959685349065, "grad_norm": 0.08756496757268906, "learning_rate": 0.00018026, "loss": 0.0444, "step": 4940 }, { "epoch": 0.7488087134104833, "grad_norm": 0.0750514343380928, "learning_rate": 0.00018022, "loss": 0.0507, "step": 4950 }, { "epoch": 0.75032145828606, "grad_norm": 0.07460404187440872, "learning_rate": 0.00018018000000000003, "loss": 0.0397, "step": 4960 }, { "epoch": 0.7518342031616367, "grad_norm": 0.12696300446987152, "learning_rate": 0.00018014, "loss": 0.0412, "step": 4970 }, { "epoch": 0.7533469480372135, "grad_norm": 0.09411120414733887, "learning_rate": 0.00018010000000000001, "loss": 0.0431, "step": 4980 }, { "epoch": 0.7548596929127902, "grad_norm": 0.08611701428890228, "learning_rate": 0.00018006, "loss": 0.041, "step": 4990 }, { "epoch": 0.756372437788367, "grad_norm": 0.07411106675863266, "learning_rate": 0.00018002, "loss": 0.0448, "step": 5000 }, { "epoch": 0.756372437788367, "eval_cer": 0.9283299113242558, "eval_loss": 0.0398402214050293, "eval_runtime": 9972.2961, "eval_samples_per_second": 2.111, "eval_steps_per_second": 0.264, "step": 5000 }, { "epoch": 0.7578851826639438, "grad_norm": 0.06552145630121231, "learning_rate": 0.00017998000000000002, "loss": 0.0411, "step": 5010 }, { "epoch": 0.7593979275395205, "grad_norm": 0.14544987678527832, "learning_rate": 0.00017994000000000002, "loss": 0.0401, "step": 5020 }, { "epoch": 0.7609106724150972, "grad_norm": 0.06693132221698761, "learning_rate": 0.0001799, "loss": 0.045, "step": 5030 }, { "epoch": 0.762423417290674, "grad_norm": 0.08100226521492004, "learning_rate": 0.00017986, "loss": 0.0478, "step": 5040 }, { "epoch": 0.7639361621662507, "grad_norm": 0.10020666569471359, "learning_rate": 0.00017982000000000002, "loss": 0.0484, "step": 5050 }, { "epoch": 0.7654489070418274, "grad_norm": 0.055785536766052246, "learning_rate": 0.00017978000000000002, "loss": 0.0423, "step": 5060 }, { "epoch": 0.7669616519174042, "grad_norm": 0.08791428059339523, "learning_rate": 0.00017974, "loss": 0.0433, "step": 5070 }, { "epoch": 0.7684743967929809, "grad_norm": 0.10156507045030594, "learning_rate": 0.0001797, "loss": 0.0447, "step": 5080 }, { "epoch": 0.7699871416685576, "grad_norm": 0.1160702183842659, "learning_rate": 0.00017966, "loss": 0.0388, "step": 5090 }, { "epoch": 0.7714998865441344, "grad_norm": 0.08716849237680435, "learning_rate": 0.00017962000000000002, "loss": 0.0492, "step": 5100 }, { "epoch": 0.7730126314197111, "grad_norm": 0.046968474984169006, "learning_rate": 0.00017958, "loss": 0.0434, "step": 5110 }, { "epoch": 0.7745253762952878, "grad_norm": 0.06234806030988693, "learning_rate": 0.00017954000000000003, "loss": 0.0504, "step": 5120 }, { "epoch": 0.7760381211708646, "grad_norm": 0.102174311876297, "learning_rate": 0.0001795, "loss": 0.044, "step": 5130 }, { "epoch": 0.7775508660464413, "grad_norm": 0.0620570033788681, "learning_rate": 0.00017946, "loss": 0.0386, "step": 5140 }, { "epoch": 0.779063610922018, "grad_norm": 0.057656314224004745, "learning_rate": 0.00017942, "loss": 0.043, "step": 5150 }, { "epoch": 0.7805763557975948, "grad_norm": 0.08451346307992935, "learning_rate": 0.00017938, "loss": 0.0452, "step": 5160 }, { "epoch": 0.7820891006731715, "grad_norm": 0.09557165950536728, "learning_rate": 0.00017934000000000003, "loss": 0.0437, "step": 5170 }, { "epoch": 0.7836018455487482, "grad_norm": 0.12275496870279312, "learning_rate": 0.0001793, "loss": 0.0427, "step": 5180 }, { "epoch": 0.785114590424325, "grad_norm": 0.3277435600757599, "learning_rate": 0.00017926000000000002, "loss": 0.045, "step": 5190 }, { "epoch": 0.7866273352999017, "grad_norm": 0.12806734442710876, "learning_rate": 0.00017922, "loss": 0.0383, "step": 5200 }, { "epoch": 0.7866273352999017, "eval_cer": 0.8426215554451947, "eval_loss": 0.03898792341351509, "eval_runtime": 10404.4584, "eval_samples_per_second": 2.023, "eval_steps_per_second": 0.253, "step": 5200 }, { "epoch": 0.7881400801754784, "grad_norm": 0.07969816774129868, "learning_rate": 0.00017918, "loss": 0.0474, "step": 5210 }, { "epoch": 0.7896528250510552, "grad_norm": 0.20492368936538696, "learning_rate": 0.00017914000000000002, "loss": 0.0423, "step": 5220 }, { "epoch": 0.7911655699266319, "grad_norm": 0.0960281640291214, "learning_rate": 0.0001791, "loss": 0.0392, "step": 5230 }, { "epoch": 0.7926783148022086, "grad_norm": 0.16566351056098938, "learning_rate": 0.00017906, "loss": 0.0415, "step": 5240 }, { "epoch": 0.7941910596777854, "grad_norm": 0.12343327701091766, "learning_rate": 0.00017902, "loss": 0.0439, "step": 5250 }, { "epoch": 0.7957038045533621, "grad_norm": 0.0732201486825943, "learning_rate": 0.00017898000000000002, "loss": 0.0462, "step": 5260 }, { "epoch": 0.7972165494289388, "grad_norm": 0.07991164177656174, "learning_rate": 0.00017894000000000002, "loss": 0.0412, "step": 5270 }, { "epoch": 0.7987292943045156, "grad_norm": 0.07868771255016327, "learning_rate": 0.0001789, "loss": 0.0458, "step": 5280 }, { "epoch": 0.8002420391800923, "grad_norm": 0.07392987608909607, "learning_rate": 0.00017886, "loss": 0.0489, "step": 5290 }, { "epoch": 0.801754784055669, "grad_norm": 0.08330372720956802, "learning_rate": 0.00017882, "loss": 0.0448, "step": 5300 }, { "epoch": 0.8032675289312458, "grad_norm": 0.06118497997522354, "learning_rate": 0.00017878000000000002, "loss": 0.0406, "step": 5310 }, { "epoch": 0.8047802738068225, "grad_norm": 0.14288772642612457, "learning_rate": 0.00017874, "loss": 0.0439, "step": 5320 }, { "epoch": 0.8062930186823992, "grad_norm": 0.06868502497673035, "learning_rate": 0.0001787, "loss": 0.0439, "step": 5330 }, { "epoch": 0.807805763557976, "grad_norm": 0.08165542781352997, "learning_rate": 0.00017866, "loss": 0.0449, "step": 5340 }, { "epoch": 0.8093185084335527, "grad_norm": 0.08748511224985123, "learning_rate": 0.00017862000000000002, "loss": 0.0455, "step": 5350 }, { "epoch": 0.8108312533091294, "grad_norm": 0.0799604058265686, "learning_rate": 0.00017858000000000001, "loss": 0.0466, "step": 5360 }, { "epoch": 0.8123439981847062, "grad_norm": 0.09606848657131195, "learning_rate": 0.00017854, "loss": 0.0452, "step": 5370 }, { "epoch": 0.8138567430602829, "grad_norm": 0.07232715934515, "learning_rate": 0.0001785, "loss": 0.0426, "step": 5380 }, { "epoch": 0.8153694879358596, "grad_norm": 0.07278240472078323, "learning_rate": 0.00017846, "loss": 0.0468, "step": 5390 }, { "epoch": 0.8168822328114363, "grad_norm": 0.06568820029497147, "learning_rate": 0.00017842000000000002, "loss": 0.0407, "step": 5400 }, { "epoch": 0.8168822328114363, "eval_cer": 0.9304918304165957, "eval_loss": 0.039248276501894, "eval_runtime": 10433.9841, "eval_samples_per_second": 2.018, "eval_steps_per_second": 0.252, "step": 5400 }, { "epoch": 0.8183949776870131, "grad_norm": 0.08667409420013428, "learning_rate": 0.00017838, "loss": 0.0504, "step": 5410 }, { "epoch": 0.8199077225625898, "grad_norm": 0.0701778307557106, "learning_rate": 0.00017834000000000003, "loss": 0.0425, "step": 5420 }, { "epoch": 0.8214204674381665, "grad_norm": 0.07078663259744644, "learning_rate": 0.0001783, "loss": 0.0456, "step": 5430 }, { "epoch": 0.8229332123137433, "grad_norm": 0.08540530502796173, "learning_rate": 0.00017826000000000002, "loss": 0.0437, "step": 5440 }, { "epoch": 0.82444595718932, "grad_norm": 0.044258490204811096, "learning_rate": 0.00017822, "loss": 0.0373, "step": 5450 }, { "epoch": 0.8259587020648967, "grad_norm": 0.08837467432022095, "learning_rate": 0.00017818, "loss": 0.0418, "step": 5460 }, { "epoch": 0.8274714469404735, "grad_norm": 0.06399261206388474, "learning_rate": 0.00017814000000000003, "loss": 0.0461, "step": 5470 }, { "epoch": 0.8289841918160502, "grad_norm": 0.07160426676273346, "learning_rate": 0.0001781, "loss": 0.0384, "step": 5480 }, { "epoch": 0.8304969366916269, "grad_norm": 0.06335125118494034, "learning_rate": 0.00017806, "loss": 0.04, "step": 5490 }, { "epoch": 0.8320096815672037, "grad_norm": 0.10239727795124054, "learning_rate": 0.00017802, "loss": 0.0396, "step": 5500 }, { "epoch": 0.8335224264427804, "grad_norm": 0.06797724217176437, "learning_rate": 0.00017798, "loss": 0.0406, "step": 5510 }, { "epoch": 0.8350351713183571, "grad_norm": 0.08448281139135361, "learning_rate": 0.00017794000000000002, "loss": 0.0489, "step": 5520 }, { "epoch": 0.8365479161939339, "grad_norm": 0.0817868560552597, "learning_rate": 0.0001779, "loss": 0.0437, "step": 5530 }, { "epoch": 0.8380606610695106, "grad_norm": 0.12232506275177002, "learning_rate": 0.00017786, "loss": 0.0475, "step": 5540 }, { "epoch": 0.8395734059450873, "grad_norm": 0.0839553102850914, "learning_rate": 0.00017782, "loss": 0.0447, "step": 5550 }, { "epoch": 0.8410861508206641, "grad_norm": 0.07315023243427277, "learning_rate": 0.00017778000000000002, "loss": 0.0441, "step": 5560 }, { "epoch": 0.8425988956962408, "grad_norm": 0.07943390309810638, "learning_rate": 0.00017774000000000002, "loss": 0.0457, "step": 5570 }, { "epoch": 0.8441116405718175, "grad_norm": 0.07185439020395279, "learning_rate": 0.0001777, "loss": 0.0429, "step": 5580 }, { "epoch": 0.8456243854473943, "grad_norm": 0.06304585933685303, "learning_rate": 0.00017766, "loss": 0.046, "step": 5590 }, { "epoch": 0.847137130322971, "grad_norm": 0.07005342841148376, "learning_rate": 0.00017762, "loss": 0.0359, "step": 5600 }, { "epoch": 0.847137130322971, "eval_cer": 0.5003496132017898, "eval_loss": 0.038213107734918594, "eval_runtime": 10454.3437, "eval_samples_per_second": 2.014, "eval_steps_per_second": 0.252, "step": 5600 }, { "epoch": 0.8486498751985477, "grad_norm": 0.08005109429359436, "learning_rate": 0.00017758000000000002, "loss": 0.0491, "step": 5610 }, { "epoch": 0.8501626200741245, "grad_norm": 0.07554598152637482, "learning_rate": 0.00017754, "loss": 0.0384, "step": 5620 }, { "epoch": 0.8516753649497012, "grad_norm": 0.08396964520215988, "learning_rate": 0.0001775, "loss": 0.0439, "step": 5630 }, { "epoch": 0.8531881098252779, "grad_norm": 0.08719771355390549, "learning_rate": 0.00017746, "loss": 0.0417, "step": 5640 }, { "epoch": 0.8547008547008547, "grad_norm": 0.09563528001308441, "learning_rate": 0.00017742000000000002, "loss": 0.0456, "step": 5650 }, { "epoch": 0.8562135995764314, "grad_norm": 0.07019315659999847, "learning_rate": 0.00017738, "loss": 0.0394, "step": 5660 }, { "epoch": 0.8577263444520081, "grad_norm": 0.06756678968667984, "learning_rate": 0.00017734, "loss": 0.046, "step": 5670 }, { "epoch": 0.8592390893275849, "grad_norm": 0.06660816073417664, "learning_rate": 0.0001773, "loss": 0.0415, "step": 5680 }, { "epoch": 0.8607518342031616, "grad_norm": 0.10737419873476028, "learning_rate": 0.00017726, "loss": 0.0402, "step": 5690 }, { "epoch": 0.8622645790787383, "grad_norm": 0.06818167865276337, "learning_rate": 0.00017722000000000001, "loss": 0.039, "step": 5700 }, { "epoch": 0.8637773239543151, "grad_norm": 0.05077315866947174, "learning_rate": 0.00017718, "loss": 0.0376, "step": 5710 }, { "epoch": 0.8652900688298918, "grad_norm": 0.08248795568943024, "learning_rate": 0.00017714000000000003, "loss": 0.0427, "step": 5720 }, { "epoch": 0.8668028137054685, "grad_norm": 0.06273633241653442, "learning_rate": 0.0001771, "loss": 0.0405, "step": 5730 }, { "epoch": 0.8683155585810453, "grad_norm": 0.11920665949583054, "learning_rate": 0.00017706000000000002, "loss": 0.0416, "step": 5740 }, { "epoch": 0.869828303456622, "grad_norm": 0.061835162341594696, "learning_rate": 0.00017702, "loss": 0.0456, "step": 5750 }, { "epoch": 0.8713410483321987, "grad_norm": 0.06891065835952759, "learning_rate": 0.00017698, "loss": 0.0435, "step": 5760 }, { "epoch": 0.8728537932077756, "grad_norm": 0.06323794275522232, "learning_rate": 0.00017694000000000002, "loss": 0.0424, "step": 5770 }, { "epoch": 0.8743665380833523, "grad_norm": 0.08218410611152649, "learning_rate": 0.0001769, "loss": 0.0428, "step": 5780 }, { "epoch": 0.875879282958929, "grad_norm": 0.05943075567483902, "learning_rate": 0.00017686, "loss": 0.0373, "step": 5790 }, { "epoch": 0.8773920278345058, "grad_norm": 0.09316141158342361, "learning_rate": 0.00017682, "loss": 0.0436, "step": 5800 }, { "epoch": 0.8773920278345058, "eval_cer": 0.5988355286077488, "eval_loss": 0.0380551740527153, "eval_runtime": 10439.6932, "eval_samples_per_second": 2.017, "eval_steps_per_second": 0.252, "step": 5800 }, { "epoch": 0.8789047727100825, "grad_norm": 0.06791754812002182, "learning_rate": 0.00017678000000000003, "loss": 0.0424, "step": 5810 }, { "epoch": 0.8804175175856592, "grad_norm": 0.06572896242141724, "learning_rate": 0.00017674000000000002, "loss": 0.0446, "step": 5820 }, { "epoch": 0.881930262461236, "grad_norm": 0.07208286970853806, "learning_rate": 0.00017669999999999999, "loss": 0.0438, "step": 5830 }, { "epoch": 0.8834430073368127, "grad_norm": 0.08518756181001663, "learning_rate": 0.00017666, "loss": 0.0401, "step": 5840 }, { "epoch": 0.8849557522123894, "grad_norm": 0.060736026614904404, "learning_rate": 0.00017662, "loss": 0.0393, "step": 5850 }, { "epoch": 0.8864684970879662, "grad_norm": 0.0627061128616333, "learning_rate": 0.00017658000000000002, "loss": 0.0358, "step": 5860 }, { "epoch": 0.8879812419635429, "grad_norm": 0.06178157031536102, "learning_rate": 0.00017654000000000001, "loss": 0.0467, "step": 5870 }, { "epoch": 0.8894939868391196, "grad_norm": 0.0688227042555809, "learning_rate": 0.0001765, "loss": 0.0415, "step": 5880 }, { "epoch": 0.8910067317146964, "grad_norm": 0.06773985177278519, "learning_rate": 0.00017646, "loss": 0.0354, "step": 5890 }, { "epoch": 0.8925194765902731, "grad_norm": 0.09130257368087769, "learning_rate": 0.00017642, "loss": 0.0414, "step": 5900 }, { "epoch": 0.8940322214658498, "grad_norm": 0.06815651059150696, "learning_rate": 0.00017638000000000002, "loss": 0.0495, "step": 5910 }, { "epoch": 0.8955449663414266, "grad_norm": 0.07239062339067459, "learning_rate": 0.00017634, "loss": 0.0459, "step": 5920 }, { "epoch": 0.8970577112170033, "grad_norm": 0.08951979130506516, "learning_rate": 0.0001763, "loss": 0.047, "step": 5930 }, { "epoch": 0.89857045609258, "grad_norm": 0.07267329841852188, "learning_rate": 0.00017626, "loss": 0.0384, "step": 5940 }, { "epoch": 0.9000832009681567, "grad_norm": 0.06272245943546295, "learning_rate": 0.00017622000000000002, "loss": 0.0373, "step": 5950 }, { "epoch": 0.9015959458437335, "grad_norm": 0.07484642416238785, "learning_rate": 0.00017618, "loss": 0.0445, "step": 5960 }, { "epoch": 0.9031086907193102, "grad_norm": 0.06894571334123611, "learning_rate": 0.00017614, "loss": 0.0418, "step": 5970 }, { "epoch": 0.904621435594887, "grad_norm": 0.07352825254201889, "learning_rate": 0.0001761, "loss": 0.0361, "step": 5980 }, { "epoch": 0.9061341804704637, "grad_norm": 0.07955580949783325, "learning_rate": 0.00017606, "loss": 0.0418, "step": 5990 }, { "epoch": 0.9076469253460404, "grad_norm": 0.057830698788166046, "learning_rate": 0.00017602, "loss": 0.0359, "step": 6000 }, { "epoch": 0.9076469253460404, "eval_cer": 0.5058427407698408, "eval_loss": 0.038296379148960114, "eval_runtime": 10426.1739, "eval_samples_per_second": 2.019, "eval_steps_per_second": 0.252, "step": 6000 }, { "epoch": 0.9091596702216171, "grad_norm": 0.08560307323932648, "learning_rate": 0.00017598, "loss": 0.0465, "step": 6010 }, { "epoch": 0.9106724150971939, "grad_norm": 0.06908106803894043, "learning_rate": 0.00017594000000000003, "loss": 0.0469, "step": 6020 }, { "epoch": 0.9121851599727706, "grad_norm": 0.058405641466379166, "learning_rate": 0.0001759, "loss": 0.0459, "step": 6030 }, { "epoch": 0.9136979048483473, "grad_norm": 0.06696103513240814, "learning_rate": 0.00017586000000000001, "loss": 0.0389, "step": 6040 }, { "epoch": 0.9152106497239241, "grad_norm": 0.06927672773599625, "learning_rate": 0.00017582, "loss": 0.0369, "step": 6050 }, { "epoch": 0.9167233945995008, "grad_norm": 0.11847919970750809, "learning_rate": 0.00017578, "loss": 0.0379, "step": 6060 }, { "epoch": 0.9182361394750775, "grad_norm": 0.06731213629245758, "learning_rate": 0.00017574000000000002, "loss": 0.0492, "step": 6070 }, { "epoch": 0.9197488843506543, "grad_norm": 0.06238566339015961, "learning_rate": 0.0001757, "loss": 0.0351, "step": 6080 }, { "epoch": 0.921261629226231, "grad_norm": 0.07023432850837708, "learning_rate": 0.00017566, "loss": 0.0418, "step": 6090 }, { "epoch": 0.9227743741018077, "grad_norm": 0.07269687950611115, "learning_rate": 0.00017562, "loss": 0.0473, "step": 6100 }, { "epoch": 0.9242871189773845, "grad_norm": 0.0714830756187439, "learning_rate": 0.00017558000000000002, "loss": 0.0419, "step": 6110 }, { "epoch": 0.9257998638529612, "grad_norm": 0.06455916166305542, "learning_rate": 0.00017554000000000002, "loss": 0.0386, "step": 6120 }, { "epoch": 0.9273126087285379, "grad_norm": 0.0797223374247551, "learning_rate": 0.0001755, "loss": 0.0425, "step": 6130 }, { "epoch": 0.9288253536041147, "grad_norm": 0.08360251039266586, "learning_rate": 0.00017546, "loss": 0.0414, "step": 6140 }, { "epoch": 0.9303380984796914, "grad_norm": 0.06491956114768982, "learning_rate": 0.00017542, "loss": 0.0367, "step": 6150 }, { "epoch": 0.9318508433552681, "grad_norm": 0.06236764043569565, "learning_rate": 0.00017538000000000002, "loss": 0.0514, "step": 6160 }, { "epoch": 0.9333635882308449, "grad_norm": 0.08555632829666138, "learning_rate": 0.00017534, "loss": 0.041, "step": 6170 }, { "epoch": 0.9348763331064216, "grad_norm": 0.08949322998523712, "learning_rate": 0.0001753, "loss": 0.0462, "step": 6180 }, { "epoch": 0.9363890779819983, "grad_norm": 0.07832244038581848, "learning_rate": 0.00017526, "loss": 0.0471, "step": 6190 }, { "epoch": 0.9379018228575751, "grad_norm": 0.06077546253800392, "learning_rate": 0.00017522000000000002, "loss": 0.0457, "step": 6200 }, { "epoch": 0.9379018228575751, "eval_cer": 0.3344013213649492, "eval_loss": 0.03830147907137871, "eval_runtime": 10461.8882, "eval_samples_per_second": 2.012, "eval_steps_per_second": 0.252, "step": 6200 }, { "epoch": 0.9394145677331518, "grad_norm": 0.048287175595760345, "learning_rate": 0.00017518, "loss": 0.0393, "step": 6210 }, { "epoch": 0.9409273126087285, "grad_norm": 0.08072841167449951, "learning_rate": 0.00017514, "loss": 0.0447, "step": 6220 }, { "epoch": 0.9424400574843053, "grad_norm": 0.07255307585000992, "learning_rate": 0.0001751, "loss": 0.0492, "step": 6230 }, { "epoch": 0.943952802359882, "grad_norm": 0.05136171355843544, "learning_rate": 0.00017506, "loss": 0.0438, "step": 6240 }, { "epoch": 0.9454655472354587, "grad_norm": 0.079404316842556, "learning_rate": 0.00017502000000000001, "loss": 0.0383, "step": 6250 }, { "epoch": 0.9469782921110355, "grad_norm": 0.10744167119264603, "learning_rate": 0.00017498, "loss": 0.0406, "step": 6260 }, { "epoch": 0.9484910369866122, "grad_norm": 0.09439695626497269, "learning_rate": 0.00017494, "loss": 0.0448, "step": 6270 }, { "epoch": 0.9500037818621889, "grad_norm": 0.07746788114309311, "learning_rate": 0.00017490000000000002, "loss": 0.0425, "step": 6280 }, { "epoch": 0.9515165267377657, "grad_norm": 0.161416694521904, "learning_rate": 0.00017486, "loss": 0.04, "step": 6290 }, { "epoch": 0.9530292716133424, "grad_norm": 0.05279407650232315, "learning_rate": 0.00017482, "loss": 0.0387, "step": 6300 }, { "epoch": 0.9545420164889191, "grad_norm": 0.06324402987957001, "learning_rate": 0.00017478, "loss": 0.0425, "step": 6310 }, { "epoch": 0.9560547613644959, "grad_norm": 0.08716294914484024, "learning_rate": 0.00017474000000000002, "loss": 0.0436, "step": 6320 }, { "epoch": 0.9575675062400726, "grad_norm": 0.08212625980377197, "learning_rate": 0.00017470000000000002, "loss": 0.0445, "step": 6330 }, { "epoch": 0.9590802511156493, "grad_norm": 0.08856002241373062, "learning_rate": 0.00017466, "loss": 0.0385, "step": 6340 }, { "epoch": 0.960592995991226, "grad_norm": 0.08907803148031235, "learning_rate": 0.00017462, "loss": 0.0451, "step": 6350 }, { "epoch": 0.9621057408668028, "grad_norm": 0.053175825625658035, "learning_rate": 0.00017458, "loss": 0.0428, "step": 6360 }, { "epoch": 0.9636184857423795, "grad_norm": 0.055600494146347046, "learning_rate": 0.00017454000000000002, "loss": 0.047, "step": 6370 }, { "epoch": 0.9651312306179562, "grad_norm": 0.10455228388309479, "learning_rate": 0.0001745, "loss": 0.0517, "step": 6380 }, { "epoch": 0.966643975493533, "grad_norm": 0.11780910938978195, "learning_rate": 0.00017446, "loss": 0.0414, "step": 6390 }, { "epoch": 0.9681567203691097, "grad_norm": 0.12388743460178375, "learning_rate": 0.00017442, "loss": 0.0438, "step": 6400 }, { "epoch": 0.9681567203691097, "eval_cer": 0.5869913004375724, "eval_loss": 0.03873522952198982, "eval_runtime": 10437.6142, "eval_samples_per_second": 2.017, "eval_steps_per_second": 0.252, "step": 6400 }, { "epoch": 0.9696694652446864, "grad_norm": 0.07916050404310226, "learning_rate": 0.00017438000000000002, "loss": 0.0402, "step": 6410 }, { "epoch": 0.9711822101202632, "grad_norm": 0.05646761879324913, "learning_rate": 0.00017434000000000001, "loss": 0.0425, "step": 6420 }, { "epoch": 0.9726949549958399, "grad_norm": 0.08374381810426712, "learning_rate": 0.0001743, "loss": 0.041, "step": 6430 }, { "epoch": 0.9742076998714166, "grad_norm": 0.06789222359657288, "learning_rate": 0.00017426, "loss": 0.0391, "step": 6440 }, { "epoch": 0.9757204447469934, "grad_norm": 0.0788172036409378, "learning_rate": 0.00017422, "loss": 0.0449, "step": 6450 }, { "epoch": 0.9772331896225701, "grad_norm": 0.1257173717021942, "learning_rate": 0.00017418000000000002, "loss": 0.0484, "step": 6460 }, { "epoch": 0.9787459344981468, "grad_norm": 0.05888710170984268, "learning_rate": 0.00017414, "loss": 0.0387, "step": 6470 }, { "epoch": 0.9802586793737236, "grad_norm": 0.07102910429239273, "learning_rate": 0.00017410000000000003, "loss": 0.0386, "step": 6480 }, { "epoch": 0.9817714242493003, "grad_norm": 0.058048397302627563, "learning_rate": 0.00017406, "loss": 0.0415, "step": 6490 }, { "epoch": 0.983284169124877, "grad_norm": 0.07222626358270645, "learning_rate": 0.00017402000000000002, "loss": 0.0378, "step": 6500 }, { "epoch": 0.9847969140004538, "grad_norm": 0.06445878744125366, "learning_rate": 0.00017398, "loss": 0.0409, "step": 6510 }, { "epoch": 0.9863096588760305, "grad_norm": 0.09191201627254486, "learning_rate": 0.00017394, "loss": 0.0414, "step": 6520 }, { "epoch": 0.9878224037516072, "grad_norm": 0.08073204010725021, "learning_rate": 0.00017390000000000003, "loss": 0.0404, "step": 6530 }, { "epoch": 0.9893351486271841, "grad_norm": 0.08427068591117859, "learning_rate": 0.00017386, "loss": 0.0398, "step": 6540 }, { "epoch": 0.9908478935027608, "grad_norm": 0.19870494306087494, "learning_rate": 0.00017382, "loss": 0.0388, "step": 6550 }, { "epoch": 0.9923606383783375, "grad_norm": 0.34985288977622986, "learning_rate": 0.00017378, "loss": 0.051, "step": 6560 }, { "epoch": 0.9938733832539143, "grad_norm": 0.12121633440256119, "learning_rate": 0.00017374000000000003, "loss": 0.0385, "step": 6570 }, { "epoch": 0.995386128129491, "grad_norm": 0.140520840883255, "learning_rate": 0.00017370000000000002, "loss": 0.0417, "step": 6580 }, { "epoch": 0.9968988730050677, "grad_norm": 0.06655796617269516, "learning_rate": 0.00017366, "loss": 0.0394, "step": 6590 }, { "epoch": 0.9984116178806445, "grad_norm": 0.07498542964458466, "learning_rate": 0.00017362, "loss": 0.0419, "step": 6600 }, { "epoch": 0.9984116178806445, "eval_cer": 0.25282902555511905, "eval_loss": 0.038411665707826614, "eval_runtime": 10433.3935, "eval_samples_per_second": 2.018, "eval_steps_per_second": 0.252, "step": 6600 }, { "epoch": 0.9999243627562212, "grad_norm": 0.25646254420280457, "learning_rate": 0.00017358, "loss": 0.039, "step": 6610 }, { "epoch": 1.0014371076317978, "grad_norm": 0.07744245231151581, "learning_rate": 0.00017354000000000002, "loss": 0.0371, "step": 6620 }, { "epoch": 1.0029498525073746, "grad_norm": 0.11968632787466049, "learning_rate": 0.00017350000000000002, "loss": 0.0303, "step": 6630 }, { "epoch": 1.0044625973829513, "grad_norm": 0.07235859334468842, "learning_rate": 0.00017346, "loss": 0.0387, "step": 6640 }, { "epoch": 1.005975342258528, "grad_norm": 0.12598702311515808, "learning_rate": 0.00017342, "loss": 0.0355, "step": 6650 }, { "epoch": 1.0074880871341048, "grad_norm": 0.10832694917917252, "learning_rate": 0.00017338, "loss": 0.0297, "step": 6660 }, { "epoch": 1.0090008320096815, "grad_norm": 0.13988302648067474, "learning_rate": 0.00017334000000000002, "loss": 0.0352, "step": 6670 }, { "epoch": 1.0105135768852582, "grad_norm": 0.09534142911434174, "learning_rate": 0.0001733, "loss": 0.0308, "step": 6680 }, { "epoch": 1.012026321760835, "grad_norm": 0.05622931197285652, "learning_rate": 0.00017326, "loss": 0.0311, "step": 6690 }, { "epoch": 1.0135390666364117, "grad_norm": 0.06480368971824646, "learning_rate": 0.00017322, "loss": 0.033, "step": 6700 }, { "epoch": 1.0150518115119884, "grad_norm": 0.08531224727630615, "learning_rate": 0.00017318000000000002, "loss": 0.0345, "step": 6710 }, { "epoch": 1.0165645563875652, "grad_norm": 0.11494185030460358, "learning_rate": 0.00017314, "loss": 0.0292, "step": 6720 }, { "epoch": 1.0180773012631419, "grad_norm": 0.06993953883647919, "learning_rate": 0.0001731, "loss": 0.0343, "step": 6730 }, { "epoch": 1.0195900461387186, "grad_norm": 0.09449311345815659, "learning_rate": 0.00017306, "loss": 0.0285, "step": 6740 }, { "epoch": 1.0211027910142954, "grad_norm": 0.10550418496131897, "learning_rate": 0.00017302, "loss": 0.0337, "step": 6750 }, { "epoch": 1.022615535889872, "grad_norm": 0.06987041234970093, "learning_rate": 0.00017298000000000001, "loss": 0.0273, "step": 6760 }, { "epoch": 1.0241282807654488, "grad_norm": 0.08014168590307236, "learning_rate": 0.00017294, "loss": 0.0318, "step": 6770 }, { "epoch": 1.0256410256410255, "grad_norm": 0.04886119067668915, "learning_rate": 0.00017290000000000003, "loss": 0.0318, "step": 6780 }, { "epoch": 1.0271537705166023, "grad_norm": 0.07735268771648407, "learning_rate": 0.00017286, "loss": 0.0377, "step": 6790 }, { "epoch": 1.028666515392179, "grad_norm": 0.07365155220031738, "learning_rate": 0.00017282000000000002, "loss": 0.0397, "step": 6800 }, { "epoch": 1.028666515392179, "eval_cer": 0.5956908628651482, "eval_loss": 0.03884879872202873, "eval_runtime": 10443.3198, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.252, "step": 6800 }, { "epoch": 1.0301792602677557, "grad_norm": 0.08235965669155121, "learning_rate": 0.00017278, "loss": 0.0356, "step": 6810 }, { "epoch": 1.0316920051433325, "grad_norm": 0.1203494668006897, "learning_rate": 0.00017274, "loss": 0.0391, "step": 6820 }, { "epoch": 1.0332047500189092, "grad_norm": 0.059709157794713974, "learning_rate": 0.00017270000000000002, "loss": 0.036, "step": 6830 }, { "epoch": 1.034717494894486, "grad_norm": 0.08380923420190811, "learning_rate": 0.00017266, "loss": 0.0311, "step": 6840 }, { "epoch": 1.0362302397700627, "grad_norm": 0.0642111599445343, "learning_rate": 0.00017262, "loss": 0.0296, "step": 6850 }, { "epoch": 1.0377429846456394, "grad_norm": 0.07701337337493896, "learning_rate": 0.00017258, "loss": 0.0318, "step": 6860 }, { "epoch": 1.0392557295212161, "grad_norm": 0.09674856811761856, "learning_rate": 0.00017254000000000003, "loss": 0.0294, "step": 6870 }, { "epoch": 1.0407684743967929, "grad_norm": 0.08543815463781357, "learning_rate": 0.00017250000000000002, "loss": 0.0322, "step": 6880 }, { "epoch": 1.0422812192723696, "grad_norm": 0.08181754499673843, "learning_rate": 0.00017246, "loss": 0.031, "step": 6890 }, { "epoch": 1.0437939641479463, "grad_norm": 0.07326922565698624, "learning_rate": 0.00017242, "loss": 0.0298, "step": 6900 }, { "epoch": 1.045306709023523, "grad_norm": 0.060128018260002136, "learning_rate": 0.00017238, "loss": 0.0351, "step": 6910 }, { "epoch": 1.0468194538990998, "grad_norm": 0.055250383913517, "learning_rate": 0.00017234000000000002, "loss": 0.0322, "step": 6920 }, { "epoch": 1.0483321987746765, "grad_norm": 0.07841707766056061, "learning_rate": 0.00017230000000000001, "loss": 0.0311, "step": 6930 }, { "epoch": 1.0498449436502533, "grad_norm": 0.06094701215624809, "learning_rate": 0.00017226, "loss": 0.0331, "step": 6940 }, { "epoch": 1.0513576885258302, "grad_norm": 0.0738435760140419, "learning_rate": 0.00017222, "loss": 0.0385, "step": 6950 }, { "epoch": 1.052870433401407, "grad_norm": 0.0741799846291542, "learning_rate": 0.00017218, "loss": 0.0332, "step": 6960 }, { "epoch": 1.0543831782769837, "grad_norm": 0.11769600957632065, "learning_rate": 0.00017214000000000002, "loss": 0.0288, "step": 6970 }, { "epoch": 1.0558959231525604, "grad_norm": 0.05547551065683365, "learning_rate": 0.0001721, "loss": 0.0351, "step": 6980 }, { "epoch": 1.0574086680281372, "grad_norm": 0.059602439403533936, "learning_rate": 0.00017206, "loss": 0.0315, "step": 6990 }, { "epoch": 1.0589214129037139, "grad_norm": 0.07523063570261002, "learning_rate": 0.00017202, "loss": 0.0344, "step": 7000 }, { "epoch": 1.0589214129037139, "eval_cer": 0.06192848124566072, "eval_loss": 0.03872867301106453, "eval_runtime": 10423.0915, "eval_samples_per_second": 2.02, "eval_steps_per_second": 0.253, "step": 7000 }, { "epoch": 1.0604341577792906, "grad_norm": 0.07334991544485092, "learning_rate": 0.00017198000000000002, "loss": 0.0394, "step": 7010 }, { "epoch": 1.0619469026548674, "grad_norm": 0.08875437080860138, "learning_rate": 0.00017194, "loss": 0.0316, "step": 7020 }, { "epoch": 1.063459647530444, "grad_norm": 0.06492207199335098, "learning_rate": 0.0001719, "loss": 0.0375, "step": 7030 }, { "epoch": 1.0649723924060208, "grad_norm": 0.08707519620656967, "learning_rate": 0.00017186, "loss": 0.0333, "step": 7040 }, { "epoch": 1.0664851372815976, "grad_norm": 0.06477733701467514, "learning_rate": 0.00017182, "loss": 0.036, "step": 7050 }, { "epoch": 1.0679978821571743, "grad_norm": 0.05914880335330963, "learning_rate": 0.00017178, "loss": 0.0307, "step": 7060 }, { "epoch": 1.069510627032751, "grad_norm": 0.11167873442173004, "learning_rate": 0.00017174, "loss": 0.0355, "step": 7070 }, { "epoch": 1.0710233719083277, "grad_norm": 0.08664342761039734, "learning_rate": 0.00017170000000000003, "loss": 0.0373, "step": 7080 }, { "epoch": 1.0725361167839045, "grad_norm": 0.06912154704332352, "learning_rate": 0.00017166, "loss": 0.0283, "step": 7090 }, { "epoch": 1.0740488616594812, "grad_norm": 0.09120757132768631, "learning_rate": 0.00017162000000000001, "loss": 0.0313, "step": 7100 }, { "epoch": 1.075561606535058, "grad_norm": 0.08159112185239792, "learning_rate": 0.00017158, "loss": 0.0413, "step": 7110 }, { "epoch": 1.0770743514106347, "grad_norm": 0.095944344997406, "learning_rate": 0.00017154, "loss": 0.0355, "step": 7120 }, { "epoch": 1.0785870962862114, "grad_norm": 0.10682930797338486, "learning_rate": 0.00017150000000000002, "loss": 0.0278, "step": 7130 }, { "epoch": 1.0800998411617881, "grad_norm": 0.06514004617929459, "learning_rate": 0.00017146, "loss": 0.0306, "step": 7140 }, { "epoch": 1.0816125860373649, "grad_norm": 0.07849156856536865, "learning_rate": 0.00017142, "loss": 0.0379, "step": 7150 }, { "epoch": 1.0831253309129416, "grad_norm": 0.0788741260766983, "learning_rate": 0.00017138, "loss": 0.032, "step": 7160 }, { "epoch": 1.0846380757885183, "grad_norm": 0.10495191067457199, "learning_rate": 0.00017134000000000002, "loss": 0.0358, "step": 7170 }, { "epoch": 1.086150820664095, "grad_norm": 0.07463409751653671, "learning_rate": 0.00017130000000000002, "loss": 0.0356, "step": 7180 }, { "epoch": 1.0876635655396718, "grad_norm": 0.08425049483776093, "learning_rate": 0.00017126, "loss": 0.0327, "step": 7190 }, { "epoch": 1.0891763104152485, "grad_norm": 0.07767146825790405, "learning_rate": 0.00017122, "loss": 0.034, "step": 7200 }, { "epoch": 1.0891763104152485, "eval_cer": 0.09758161553419167, "eval_loss": 0.037929706275463104, "eval_runtime": 10420.1284, "eval_samples_per_second": 2.02, "eval_steps_per_second": 0.253, "step": 7200 }, { "epoch": 1.0906890552908253, "grad_norm": 0.07770776748657227, "learning_rate": 0.00017118, "loss": 0.0321, "step": 7210 }, { "epoch": 1.092201800166402, "grad_norm": 0.06977003812789917, "learning_rate": 0.00017114000000000002, "loss": 0.0315, "step": 7220 }, { "epoch": 1.0937145450419787, "grad_norm": 0.077842116355896, "learning_rate": 0.0001711, "loss": 0.0317, "step": 7230 }, { "epoch": 1.0952272899175555, "grad_norm": 0.11414997279644012, "learning_rate": 0.00017106, "loss": 0.0392, "step": 7240 }, { "epoch": 1.0967400347931322, "grad_norm": 0.07568582892417908, "learning_rate": 0.00017102, "loss": 0.0369, "step": 7250 }, { "epoch": 1.098252779668709, "grad_norm": 0.07864728569984436, "learning_rate": 0.00017098000000000002, "loss": 0.038, "step": 7260 }, { "epoch": 1.0997655245442857, "grad_norm": 0.0852401927113533, "learning_rate": 0.00017094, "loss": 0.0323, "step": 7270 }, { "epoch": 1.1012782694198624, "grad_norm": 0.06548303365707397, "learning_rate": 0.0001709, "loss": 0.0373, "step": 7280 }, { "epoch": 1.1027910142954391, "grad_norm": 0.10153812170028687, "learning_rate": 0.00017086, "loss": 0.0321, "step": 7290 }, { "epoch": 1.1043037591710159, "grad_norm": 0.09032442420721054, "learning_rate": 0.00017082, "loss": 0.0306, "step": 7300 }, { "epoch": 1.1058165040465926, "grad_norm": 0.12109789252281189, "learning_rate": 0.00017078000000000001, "loss": 0.0355, "step": 7310 }, { "epoch": 1.1073292489221693, "grad_norm": 0.08515240997076035, "learning_rate": 0.00017074, "loss": 0.0374, "step": 7320 }, { "epoch": 1.108841993797746, "grad_norm": 0.06838446855545044, "learning_rate": 0.0001707, "loss": 0.0309, "step": 7330 }, { "epoch": 1.1103547386733228, "grad_norm": 0.10029911994934082, "learning_rate": 0.00017066, "loss": 0.0377, "step": 7340 }, { "epoch": 1.1118674835488995, "grad_norm": 0.08499938994646072, "learning_rate": 0.00017062, "loss": 0.0317, "step": 7350 }, { "epoch": 1.1133802284244763, "grad_norm": 0.10972133278846741, "learning_rate": 0.00017058, "loss": 0.0344, "step": 7360 }, { "epoch": 1.114892973300053, "grad_norm": 0.06848263740539551, "learning_rate": 0.00017054, "loss": 0.0356, "step": 7370 }, { "epoch": 1.1164057181756297, "grad_norm": 0.06813491135835648, "learning_rate": 0.00017050000000000002, "loss": 0.0291, "step": 7380 }, { "epoch": 1.1179184630512065, "grad_norm": 0.053215883672237396, "learning_rate": 0.00017046, "loss": 0.0297, "step": 7390 }, { "epoch": 1.1194312079267832, "grad_norm": 0.08575928211212158, "learning_rate": 0.00017042, "loss": 0.0378, "step": 7400 }, { "epoch": 1.1194312079267832, "eval_cer": 0.05163898174846133, "eval_loss": 0.03768303617835045, "eval_runtime": 10418.7834, "eval_samples_per_second": 2.021, "eval_steps_per_second": 0.253, "step": 7400 }, { "epoch": 1.12094395280236, "grad_norm": 0.07621601223945618, "learning_rate": 0.00017038, "loss": 0.032, "step": 7410 }, { "epoch": 1.1224566976779367, "grad_norm": 0.11499703675508499, "learning_rate": 0.00017034, "loss": 0.0331, "step": 7420 }, { "epoch": 1.1239694425535134, "grad_norm": 0.08789568394422531, "learning_rate": 0.00017030000000000002, "loss": 0.0332, "step": 7430 }, { "epoch": 1.1254821874290901, "grad_norm": 0.0887342318892479, "learning_rate": 0.00017025999999999999, "loss": 0.0374, "step": 7440 }, { "epoch": 1.1269949323046669, "grad_norm": 0.11794856935739517, "learning_rate": 0.00017022, "loss": 0.0347, "step": 7450 }, { "epoch": 1.1285076771802436, "grad_norm": 0.07593784481287003, "learning_rate": 0.00017018, "loss": 0.0323, "step": 7460 }, { "epoch": 1.1300204220558203, "grad_norm": 0.06868909299373627, "learning_rate": 0.00017014000000000002, "loss": 0.0311, "step": 7470 }, { "epoch": 1.131533166931397, "grad_norm": 0.1010032370686531, "learning_rate": 0.00017010000000000001, "loss": 0.0333, "step": 7480 }, { "epoch": 1.1330459118069738, "grad_norm": 0.08664656430482864, "learning_rate": 0.00017006, "loss": 0.0358, "step": 7490 }, { "epoch": 1.1345586566825505, "grad_norm": 0.09153386205434799, "learning_rate": 0.00017002, "loss": 0.0288, "step": 7500 }, { "epoch": 1.1360714015581272, "grad_norm": 0.10042116045951843, "learning_rate": 0.00016998, "loss": 0.0324, "step": 7510 }, { "epoch": 1.137584146433704, "grad_norm": 0.09703629463911057, "learning_rate": 0.00016994000000000002, "loss": 0.0356, "step": 7520 }, { "epoch": 1.1390968913092807, "grad_norm": 0.07961410284042358, "learning_rate": 0.0001699, "loss": 0.0279, "step": 7530 }, { "epoch": 1.1406096361848574, "grad_norm": 0.09164062142372131, "learning_rate": 0.00016986000000000003, "loss": 0.033, "step": 7540 }, { "epoch": 1.1421223810604342, "grad_norm": 0.0804910659790039, "learning_rate": 0.00016982, "loss": 0.033, "step": 7550 }, { "epoch": 1.143635125936011, "grad_norm": 0.07923970371484756, "learning_rate": 0.00016978000000000002, "loss": 0.0366, "step": 7560 }, { "epoch": 1.1451478708115876, "grad_norm": 0.1198810487985611, "learning_rate": 0.00016974, "loss": 0.0361, "step": 7570 }, { "epoch": 1.1466606156871644, "grad_norm": 0.08409520238637924, "learning_rate": 0.0001697, "loss": 0.0323, "step": 7580 }, { "epoch": 1.148173360562741, "grad_norm": 0.09524326026439667, "learning_rate": 0.00016966000000000003, "loss": 0.0338, "step": 7590 }, { "epoch": 1.1496861054383178, "grad_norm": 0.0670013502240181, "learning_rate": 0.00016962, "loss": 0.033, "step": 7600 }, { "epoch": 1.1496861054383178, "eval_cer": 0.04317970118571997, "eval_loss": 0.03775278851389885, "eval_runtime": 10413.2831, "eval_samples_per_second": 2.022, "eval_steps_per_second": 0.253, "step": 7600 }, { "epoch": 1.1511988503138946, "grad_norm": 0.07331959903240204, "learning_rate": 0.00016958, "loss": 0.0331, "step": 7610 }, { "epoch": 1.1527115951894713, "grad_norm": 0.06851343810558319, "learning_rate": 0.00016954, "loss": 0.0306, "step": 7620 }, { "epoch": 1.154224340065048, "grad_norm": 0.07627418637275696, "learning_rate": 0.00016950000000000003, "loss": 0.0334, "step": 7630 }, { "epoch": 1.1557370849406248, "grad_norm": 0.08676694333553314, "learning_rate": 0.00016946000000000002, "loss": 0.0322, "step": 7640 }, { "epoch": 1.1572498298162015, "grad_norm": 0.07023747265338898, "learning_rate": 0.00016942000000000001, "loss": 0.0358, "step": 7650 }, { "epoch": 1.1587625746917782, "grad_norm": 0.07805462926626205, "learning_rate": 0.00016938, "loss": 0.0325, "step": 7660 }, { "epoch": 1.160275319567355, "grad_norm": 0.0867529958486557, "learning_rate": 0.00016934, "loss": 0.0318, "step": 7670 }, { "epoch": 1.1617880644429317, "grad_norm": 0.08449842035770416, "learning_rate": 0.00016930000000000002, "loss": 0.0408, "step": 7680 }, { "epoch": 1.1633008093185084, "grad_norm": 0.08054087311029434, "learning_rate": 0.00016926000000000002, "loss": 0.0306, "step": 7690 }, { "epoch": 1.1648135541940852, "grad_norm": 0.08645962178707123, "learning_rate": 0.00016922, "loss": 0.0299, "step": 7700 }, { "epoch": 1.166326299069662, "grad_norm": 0.0892554521560669, "learning_rate": 0.00016918, "loss": 0.0352, "step": 7710 }, { "epoch": 1.1678390439452386, "grad_norm": 0.06643500924110413, "learning_rate": 0.00016914, "loss": 0.0284, "step": 7720 }, { "epoch": 1.1693517888208154, "grad_norm": 0.06918591260910034, "learning_rate": 0.00016910000000000002, "loss": 0.0278, "step": 7730 }, { "epoch": 1.170864533696392, "grad_norm": 0.08370740711688995, "learning_rate": 0.00016906, "loss": 0.0316, "step": 7740 }, { "epoch": 1.1723772785719688, "grad_norm": 0.053777385503053665, "learning_rate": 0.00016902, "loss": 0.036, "step": 7750 }, { "epoch": 1.1738900234475456, "grad_norm": 0.0665329247713089, "learning_rate": 0.00016898, "loss": 0.0333, "step": 7760 }, { "epoch": 1.1754027683231223, "grad_norm": 0.07484222948551178, "learning_rate": 0.00016894000000000002, "loss": 0.0319, "step": 7770 }, { "epoch": 1.176915513198699, "grad_norm": 0.08218715339899063, "learning_rate": 0.0001689, "loss": 0.0308, "step": 7780 }, { "epoch": 1.1784282580742758, "grad_norm": 0.06873024255037308, "learning_rate": 0.00016886, "loss": 0.0349, "step": 7790 }, { "epoch": 1.1799410029498525, "grad_norm": 0.07846609503030777, "learning_rate": 0.00016882, "loss": 0.0359, "step": 7800 }, { "epoch": 1.1799410029498525, "eval_cer": 0.1078840865459451, "eval_loss": 0.03878456726670265, "eval_runtime": 10398.1972, "eval_samples_per_second": 2.025, "eval_steps_per_second": 0.253, "step": 7800 }, { "epoch": 1.1814537478254292, "grad_norm": 0.06112883612513542, "learning_rate": 0.00016878, "loss": 0.0324, "step": 7810 }, { "epoch": 1.182966492701006, "grad_norm": 0.07065495103597641, "learning_rate": 0.00016874000000000001, "loss": 0.0333, "step": 7820 }, { "epoch": 1.1844792375765827, "grad_norm": 0.10944267362356186, "learning_rate": 0.0001687, "loss": 0.0322, "step": 7830 }, { "epoch": 1.1859919824521594, "grad_norm": 0.08741329610347748, "learning_rate": 0.00016866000000000003, "loss": 0.0339, "step": 7840 }, { "epoch": 1.1875047273277362, "grad_norm": 0.06457091867923737, "learning_rate": 0.00016862, "loss": 0.0345, "step": 7850 }, { "epoch": 1.1890174722033129, "grad_norm": 0.0570165179669857, "learning_rate": 0.00016858000000000002, "loss": 0.032, "step": 7860 }, { "epoch": 1.1905302170788896, "grad_norm": 0.07944530248641968, "learning_rate": 0.00016854, "loss": 0.0347, "step": 7870 }, { "epoch": 1.1920429619544664, "grad_norm": 0.06981216371059418, "learning_rate": 0.0001685, "loss": 0.0329, "step": 7880 }, { "epoch": 1.193555706830043, "grad_norm": 0.052252449095249176, "learning_rate": 0.00016846000000000002, "loss": 0.0327, "step": 7890 }, { "epoch": 1.1950684517056198, "grad_norm": 0.05333190783858299, "learning_rate": 0.00016842, "loss": 0.0269, "step": 7900 }, { "epoch": 1.1965811965811965, "grad_norm": 0.18012838065624237, "learning_rate": 0.00016838, "loss": 0.0324, "step": 7910 }, { "epoch": 1.1980939414567733, "grad_norm": 0.06892676651477814, "learning_rate": 0.00016834, "loss": 0.0294, "step": 7920 }, { "epoch": 1.19960668633235, "grad_norm": 0.07558593899011612, "learning_rate": 0.00016830000000000003, "loss": 0.0371, "step": 7930 }, { "epoch": 1.2011194312079267, "grad_norm": 0.08046507835388184, "learning_rate": 0.00016826000000000002, "loss": 0.0311, "step": 7940 }, { "epoch": 1.2026321760835035, "grad_norm": 0.07986424118280411, "learning_rate": 0.00016822, "loss": 0.0357, "step": 7950 }, { "epoch": 1.2041449209590802, "grad_norm": 0.07394195348024368, "learning_rate": 0.00016818, "loss": 0.0341, "step": 7960 }, { "epoch": 1.205657665834657, "grad_norm": 0.06269822269678116, "learning_rate": 0.00016814, "loss": 0.0329, "step": 7970 }, { "epoch": 1.2071704107102337, "grad_norm": 0.07179784774780273, "learning_rate": 0.00016810000000000002, "loss": 0.0329, "step": 7980 }, { "epoch": 1.2086831555858104, "grad_norm": 0.10174887627363205, "learning_rate": 0.00016806000000000001, "loss": 0.0262, "step": 7990 }, { "epoch": 1.2101959004613871, "grad_norm": 0.06536643952131271, "learning_rate": 0.00016802, "loss": 0.034, "step": 8000 }, { "epoch": 1.2101959004613871, "eval_cer": 0.15941559003095868, "eval_loss": 0.03837862238287926, "eval_runtime": 10390.1541, "eval_samples_per_second": 2.026, "eval_steps_per_second": 0.253, "step": 8000 }, { "epoch": 1.2117086453369639, "grad_norm": 0.13079065084457397, "learning_rate": 0.00016798, "loss": 0.037, "step": 8010 }, { "epoch": 1.2132213902125406, "grad_norm": 0.07293607294559479, "learning_rate": 0.00016794000000000002, "loss": 0.0295, "step": 8020 }, { "epoch": 1.2147341350881173, "grad_norm": 0.07390507310628891, "learning_rate": 0.00016790000000000002, "loss": 0.0309, "step": 8030 }, { "epoch": 1.216246879963694, "grad_norm": 0.22675780951976776, "learning_rate": 0.00016786, "loss": 0.0341, "step": 8040 }, { "epoch": 1.2177596248392708, "grad_norm": 0.06630139797925949, "learning_rate": 0.00016782, "loss": 0.0359, "step": 8050 }, { "epoch": 1.2192723697148475, "grad_norm": 0.09231210500001907, "learning_rate": 0.00016778, "loss": 0.0325, "step": 8060 }, { "epoch": 1.2207851145904243, "grad_norm": 0.067893847823143, "learning_rate": 0.00016774000000000002, "loss": 0.0338, "step": 8070 }, { "epoch": 1.222297859466001, "grad_norm": 0.16284491121768951, "learning_rate": 0.0001677, "loss": 0.0362, "step": 8080 }, { "epoch": 1.2238106043415777, "grad_norm": 0.07695828378200531, "learning_rate": 0.00016766, "loss": 0.0367, "step": 8090 }, { "epoch": 1.2253233492171545, "grad_norm": 0.07685229927301407, "learning_rate": 0.00016762, "loss": 0.0383, "step": 8100 }, { "epoch": 1.2268360940927312, "grad_norm": 0.08510534465312958, "learning_rate": 0.00016758, "loss": 0.0346, "step": 8110 }, { "epoch": 1.228348838968308, "grad_norm": 0.16018977761268616, "learning_rate": 0.00016754, "loss": 0.0314, "step": 8120 }, { "epoch": 1.2298615838438847, "grad_norm": 0.10644716769456863, "learning_rate": 0.0001675, "loss": 0.0427, "step": 8130 }, { "epoch": 1.2313743287194614, "grad_norm": 0.06390608847141266, "learning_rate": 0.00016746000000000003, "loss": 0.0333, "step": 8140 }, { "epoch": 1.2328870735950381, "grad_norm": 0.1173742264509201, "learning_rate": 0.00016742, "loss": 0.0335, "step": 8150 }, { "epoch": 1.2343998184706149, "grad_norm": 0.08506636321544647, "learning_rate": 0.00016738000000000001, "loss": 0.0393, "step": 8160 }, { "epoch": 1.2359125633461916, "grad_norm": 0.08176897466182709, "learning_rate": 0.00016734, "loss": 0.0306, "step": 8170 }, { "epoch": 1.2374253082217683, "grad_norm": 0.11272590607404709, "learning_rate": 0.0001673, "loss": 0.0368, "step": 8180 }, { "epoch": 1.238938053097345, "grad_norm": 0.10923430323600769, "learning_rate": 0.00016726000000000002, "loss": 0.0389, "step": 8190 }, { "epoch": 1.2404507979729218, "grad_norm": 0.05665091425180435, "learning_rate": 0.00016722, "loss": 0.0352, "step": 8200 }, { "epoch": 1.2404507979729218, "eval_cer": 0.195939668868118, "eval_loss": 0.03837649151682854, "eval_runtime": 10379.5895, "eval_samples_per_second": 2.028, "eval_steps_per_second": 0.254, "step": 8200 }, { "epoch": 1.2419635428484985, "grad_norm": 0.08927123993635178, "learning_rate": 0.00016718, "loss": 0.0356, "step": 8210 }, { "epoch": 1.2434762877240753, "grad_norm": 0.09398534893989563, "learning_rate": 0.00016714, "loss": 0.0365, "step": 8220 }, { "epoch": 1.244989032599652, "grad_norm": 0.0905461311340332, "learning_rate": 0.00016710000000000002, "loss": 0.0335, "step": 8230 }, { "epoch": 1.2465017774752287, "grad_norm": 0.09033455699682236, "learning_rate": 0.00016706000000000002, "loss": 0.0376, "step": 8240 }, { "epoch": 1.2480145223508055, "grad_norm": 0.08217161148786545, "learning_rate": 0.00016702, "loss": 0.032, "step": 8250 }, { "epoch": 1.2495272672263822, "grad_norm": 0.0694824755191803, "learning_rate": 0.00016698, "loss": 0.0354, "step": 8260 }, { "epoch": 1.2510400121019591, "grad_norm": 0.08535374701023102, "learning_rate": 0.00016694, "loss": 0.0288, "step": 8270 }, { "epoch": 1.2525527569775359, "grad_norm": 0.10267391055822372, "learning_rate": 0.00016690000000000002, "loss": 0.0331, "step": 8280 }, { "epoch": 1.2540655018531126, "grad_norm": 0.0720328763127327, "learning_rate": 0.00016686, "loss": 0.0324, "step": 8290 }, { "epoch": 1.2555782467286893, "grad_norm": 0.15617039799690247, "learning_rate": 0.00016682, "loss": 0.0374, "step": 8300 }, { "epoch": 1.257090991604266, "grad_norm": 0.09863468259572983, "learning_rate": 0.00016678, "loss": 0.0363, "step": 8310 }, { "epoch": 1.2586037364798428, "grad_norm": 0.08562877029180527, "learning_rate": 0.00016674000000000002, "loss": 0.0347, "step": 8320 }, { "epoch": 1.2601164813554195, "grad_norm": 0.09868349879980087, "learning_rate": 0.0001667, "loss": 0.0362, "step": 8330 }, { "epoch": 1.2616292262309963, "grad_norm": 0.09744835644960403, "learning_rate": 0.00016666, "loss": 0.0364, "step": 8340 }, { "epoch": 1.263141971106573, "grad_norm": 0.19243358075618744, "learning_rate": 0.00016662, "loss": 0.0378, "step": 8350 }, { "epoch": 1.2646547159821497, "grad_norm": 0.06478457897901535, "learning_rate": 0.00016658, "loss": 0.033, "step": 8360 }, { "epoch": 1.2661674608577265, "grad_norm": 0.09313791990280151, "learning_rate": 0.00016654000000000001, "loss": 0.04, "step": 8370 }, { "epoch": 1.2676802057333032, "grad_norm": 0.0906825065612793, "learning_rate": 0.0001665, "loss": 0.0341, "step": 8380 }, { "epoch": 1.26919295060888, "grad_norm": 0.08549359440803528, "learning_rate": 0.00016646000000000003, "loss": 0.0376, "step": 8390 }, { "epoch": 1.2707056954844567, "grad_norm": 0.0915452241897583, "learning_rate": 0.00016642, "loss": 0.029, "step": 8400 }, { "epoch": 1.2707056954844567, "eval_cer": 0.19141261028875828, "eval_loss": 0.03777679055929184, "eval_runtime": 10360.722, "eval_samples_per_second": 2.032, "eval_steps_per_second": 0.254, "step": 8400 }, { "epoch": 1.2722184403600334, "grad_norm": 0.07039971649646759, "learning_rate": 0.00016638, "loss": 0.0355, "step": 8410 }, { "epoch": 1.2737311852356101, "grad_norm": 0.08890164643526077, "learning_rate": 0.00016634, "loss": 0.03, "step": 8420 }, { "epoch": 1.2752439301111869, "grad_norm": 0.07611805945634842, "learning_rate": 0.0001663, "loss": 0.037, "step": 8430 }, { "epoch": 1.2767566749867636, "grad_norm": 0.10268427431583405, "learning_rate": 0.00016626000000000002, "loss": 0.0346, "step": 8440 }, { "epoch": 1.2782694198623403, "grad_norm": 0.07185817509889603, "learning_rate": 0.00016622, "loss": 0.0334, "step": 8450 }, { "epoch": 1.279782164737917, "grad_norm": 0.09720634669065475, "learning_rate": 0.00016618, "loss": 0.0328, "step": 8460 }, { "epoch": 1.2812949096134938, "grad_norm": 0.08373324573040009, "learning_rate": 0.00016614, "loss": 0.0342, "step": 8470 }, { "epoch": 1.2828076544890705, "grad_norm": 0.05525701493024826, "learning_rate": 0.0001661, "loss": 0.0295, "step": 8480 }, { "epoch": 1.2843203993646473, "grad_norm": 0.08398504555225372, "learning_rate": 0.00016606000000000002, "loss": 0.0336, "step": 8490 }, { "epoch": 1.285833144240224, "grad_norm": 0.11384329944849014, "learning_rate": 0.00016601999999999999, "loss": 0.0335, "step": 8500 }, { "epoch": 1.2873458891158007, "grad_norm": 0.05366117134690285, "learning_rate": 0.00016598, "loss": 0.0303, "step": 8510 }, { "epoch": 1.2888586339913775, "grad_norm": 0.09270923584699631, "learning_rate": 0.00016594, "loss": 0.0309, "step": 8520 }, { "epoch": 1.2903713788669542, "grad_norm": 0.09621911495923996, "learning_rate": 0.00016590000000000002, "loss": 0.0326, "step": 8530 }, { "epoch": 1.291884123742531, "grad_norm": 0.09750113636255264, "learning_rate": 0.00016586000000000001, "loss": 0.032, "step": 8540 }, { "epoch": 1.2933968686181077, "grad_norm": 0.08557499945163727, "learning_rate": 0.00016582, "loss": 0.0331, "step": 8550 }, { "epoch": 1.2949096134936844, "grad_norm": 0.0842200294137001, "learning_rate": 0.00016578, "loss": 0.0339, "step": 8560 }, { "epoch": 1.2964223583692611, "grad_norm": 0.06341574341058731, "learning_rate": 0.00016574, "loss": 0.0369, "step": 8570 }, { "epoch": 1.2979351032448379, "grad_norm": 0.07687686383724213, "learning_rate": 0.00016570000000000002, "loss": 0.0291, "step": 8580 }, { "epoch": 1.2994478481204146, "grad_norm": 0.07118263840675354, "learning_rate": 0.00016566, "loss": 0.0331, "step": 8590 }, { "epoch": 1.3009605929959913, "grad_norm": 0.10967772454023361, "learning_rate": 0.00016562, "loss": 0.04, "step": 8600 }, { "epoch": 1.3009605929959913, "eval_cer": 0.15955704130871465, "eval_loss": 0.03786647692322731, "eval_runtime": 10383.8112, "eval_samples_per_second": 2.027, "eval_steps_per_second": 0.253, "step": 8600 }, { "epoch": 1.302473337871568, "grad_norm": 0.09102348983287811, "learning_rate": 0.00016558, "loss": 0.0337, "step": 8610 }, { "epoch": 1.3039860827471448, "grad_norm": 0.0596625916659832, "learning_rate": 0.00016554000000000002, "loss": 0.0341, "step": 8620 }, { "epoch": 1.3054988276227215, "grad_norm": 0.0790410116314888, "learning_rate": 0.0001655, "loss": 0.0348, "step": 8630 }, { "epoch": 1.3070115724982982, "grad_norm": 0.08243832737207413, "learning_rate": 0.00016546, "loss": 0.0351, "step": 8640 }, { "epoch": 1.308524317373875, "grad_norm": 0.07890262454748154, "learning_rate": 0.00016542, "loss": 0.0331, "step": 8650 }, { "epoch": 1.3100370622494517, "grad_norm": 0.06424404680728912, "learning_rate": 0.00016538, "loss": 0.032, "step": 8660 }, { "epoch": 1.3115498071250284, "grad_norm": 0.08828658610582352, "learning_rate": 0.00016534, "loss": 0.0351, "step": 8670 }, { "epoch": 1.3130625520006052, "grad_norm": 0.07190482318401337, "learning_rate": 0.0001653, "loss": 0.0334, "step": 8680 }, { "epoch": 1.314575296876182, "grad_norm": 0.1207108125090599, "learning_rate": 0.00016526000000000003, "loss": 0.0333, "step": 8690 }, { "epoch": 1.3160880417517586, "grad_norm": 0.057197410613298416, "learning_rate": 0.00016522, "loss": 0.0273, "step": 8700 }, { "epoch": 1.3176007866273354, "grad_norm": 0.0845530703663826, "learning_rate": 0.00016518000000000001, "loss": 0.0398, "step": 8710 }, { "epoch": 1.319113531502912, "grad_norm": 0.07357069104909897, "learning_rate": 0.00016514, "loss": 0.0334, "step": 8720 }, { "epoch": 1.3206262763784888, "grad_norm": 0.07419273257255554, "learning_rate": 0.0001651, "loss": 0.0267, "step": 8730 }, { "epoch": 1.3221390212540656, "grad_norm": 0.08293847739696503, "learning_rate": 0.00016506000000000002, "loss": 0.0286, "step": 8740 }, { "epoch": 1.3236517661296423, "grad_norm": 0.09437254071235657, "learning_rate": 0.00016502, "loss": 0.0411, "step": 8750 }, { "epoch": 1.325164511005219, "grad_norm": 0.06988554447889328, "learning_rate": 0.00016498, "loss": 0.0288, "step": 8760 }, { "epoch": 1.3266772558807958, "grad_norm": 0.11081293970346451, "learning_rate": 0.00016494, "loss": 0.0342, "step": 8770 }, { "epoch": 1.3281900007563725, "grad_norm": 0.0911073237657547, "learning_rate": 0.0001649, "loss": 0.0324, "step": 8780 }, { "epoch": 1.3297027456319492, "grad_norm": 0.08337673544883728, "learning_rate": 0.00016486000000000002, "loss": 0.0297, "step": 8790 }, { "epoch": 1.331215490507526, "grad_norm": 0.09077824652194977, "learning_rate": 0.00016482, "loss": 0.0319, "step": 8800 }, { "epoch": 1.331215490507526, "eval_cer": 0.050760007214632856, "eval_loss": 0.03842457756400108, "eval_runtime": 10378.6583, "eval_samples_per_second": 2.028, "eval_steps_per_second": 0.254, "step": 8800 }, { "epoch": 1.3327282353831027, "grad_norm": 0.12336084991693497, "learning_rate": 0.00016478, "loss": 0.0371, "step": 8810 }, { "epoch": 1.3342409802586794, "grad_norm": 0.07978357374668121, "learning_rate": 0.00016474, "loss": 0.0349, "step": 8820 }, { "epoch": 1.3357537251342562, "grad_norm": 0.1073361411690712, "learning_rate": 0.00016470000000000002, "loss": 0.0417, "step": 8830 }, { "epoch": 1.337266470009833, "grad_norm": 0.05822708085179329, "learning_rate": 0.00016466, "loss": 0.0302, "step": 8840 }, { "epoch": 1.3387792148854096, "grad_norm": 0.06241593137383461, "learning_rate": 0.00016462, "loss": 0.0365, "step": 8850 }, { "epoch": 1.3402919597609864, "grad_norm": 0.10107123106718063, "learning_rate": 0.00016458, "loss": 0.0345, "step": 8860 }, { "epoch": 1.341804704636563, "grad_norm": 0.09659604728221893, "learning_rate": 0.00016454, "loss": 0.0324, "step": 8870 }, { "epoch": 1.3433174495121398, "grad_norm": 0.07501540333032608, "learning_rate": 0.00016450000000000001, "loss": 0.0317, "step": 8880 }, { "epoch": 1.3448301943877166, "grad_norm": 0.071120485663414, "learning_rate": 0.00016446, "loss": 0.0299, "step": 8890 }, { "epoch": 1.3463429392632933, "grad_norm": 0.07235920429229736, "learning_rate": 0.00016442000000000003, "loss": 0.0337, "step": 8900 }, { "epoch": 1.34785568413887, "grad_norm": 0.08588097244501114, "learning_rate": 0.00016438, "loss": 0.0302, "step": 8910 }, { "epoch": 1.3493684290144468, "grad_norm": 0.052244190126657486, "learning_rate": 0.00016434000000000002, "loss": 0.0326, "step": 8920 }, { "epoch": 1.3508811738900235, "grad_norm": 0.0702931210398674, "learning_rate": 0.0001643, "loss": 0.0372, "step": 8930 }, { "epoch": 1.3523939187656002, "grad_norm": 0.10441485792398453, "learning_rate": 0.00016426, "loss": 0.037, "step": 8940 }, { "epoch": 1.353906663641177, "grad_norm": 0.10514800250530243, "learning_rate": 0.00016422000000000002, "loss": 0.037, "step": 8950 }, { "epoch": 1.3554194085167537, "grad_norm": 0.07011867314577103, "learning_rate": 0.00016418, "loss": 0.0314, "step": 8960 }, { "epoch": 1.3569321533923304, "grad_norm": 0.06335943937301636, "learning_rate": 0.00016414, "loss": 0.0311, "step": 8970 }, { "epoch": 1.3584448982679072, "grad_norm": 0.07194424420595169, "learning_rate": 0.0001641, "loss": 0.0336, "step": 8980 }, { "epoch": 1.3599576431434839, "grad_norm": 0.07171431183815002, "learning_rate": 0.00016406000000000003, "loss": 0.0312, "step": 8990 }, { "epoch": 1.3614703880190606, "grad_norm": 0.14893119037151337, "learning_rate": 0.00016402000000000002, "loss": 0.0348, "step": 9000 }, { "epoch": 1.3614703880190606, "eval_cer": 0.23852391576669063, "eval_loss": 0.03737874701619148, "eval_runtime": 10378.6671, "eval_samples_per_second": 2.028, "eval_steps_per_second": 0.254, "step": 9000 }, { "epoch": 1.3629831328946374, "grad_norm": 0.09854207932949066, "learning_rate": 0.00016398, "loss": 0.0334, "step": 9010 }, { "epoch": 1.364495877770214, "grad_norm": 0.0829731673002243, "learning_rate": 0.00016394, "loss": 0.0367, "step": 9020 }, { "epoch": 1.3660086226457908, "grad_norm": 0.05378841981291771, "learning_rate": 0.0001639, "loss": 0.0328, "step": 9030 }, { "epoch": 1.3675213675213675, "grad_norm": 0.08590775728225708, "learning_rate": 0.00016386000000000002, "loss": 0.0337, "step": 9040 }, { "epoch": 1.3690341123969443, "grad_norm": 0.06473217159509659, "learning_rate": 0.00016382000000000001, "loss": 0.0309, "step": 9050 }, { "epoch": 1.370546857272521, "grad_norm": 0.14496292173862457, "learning_rate": 0.00016378, "loss": 0.0362, "step": 9060 }, { "epoch": 1.3720596021480977, "grad_norm": 0.0658840760588646, "learning_rate": 0.00016374, "loss": 0.0316, "step": 9070 }, { "epoch": 1.3735723470236745, "grad_norm": 0.0722692534327507, "learning_rate": 0.00016370000000000002, "loss": 0.0321, "step": 9080 }, { "epoch": 1.3750850918992512, "grad_norm": 0.0751873180270195, "learning_rate": 0.00016366000000000002, "loss": 0.0357, "step": 9090 }, { "epoch": 1.376597836774828, "grad_norm": 0.07309116423130035, "learning_rate": 0.00016362, "loss": 0.0329, "step": 9100 }, { "epoch": 1.3781105816504047, "grad_norm": 0.09205902367830276, "learning_rate": 0.00016358, "loss": 0.0311, "step": 9110 }, { "epoch": 1.3796233265259814, "grad_norm": 0.06787604093551636, "learning_rate": 0.00016354, "loss": 0.0308, "step": 9120 }, { "epoch": 1.3811360714015581, "grad_norm": 0.08365906029939651, "learning_rate": 0.00016350000000000002, "loss": 0.0344, "step": 9130 }, { "epoch": 1.3826488162771349, "grad_norm": 0.07461418211460114, "learning_rate": 0.00016346, "loss": 0.0286, "step": 9140 }, { "epoch": 1.3841615611527116, "grad_norm": 0.11862760782241821, "learning_rate": 0.00016342, "loss": 0.0361, "step": 9150 }, { "epoch": 1.3856743060282883, "grad_norm": 0.07170487195253372, "learning_rate": 0.00016338, "loss": 0.0335, "step": 9160 }, { "epoch": 1.387187050903865, "grad_norm": 0.05578533932566643, "learning_rate": 0.00016334, "loss": 0.0311, "step": 9170 }, { "epoch": 1.3886997957794418, "grad_norm": 0.08838359266519547, "learning_rate": 0.0001633, "loss": 0.0341, "step": 9180 }, { "epoch": 1.3902125406550185, "grad_norm": 0.09284081310033798, "learning_rate": 0.00016326, "loss": 0.0322, "step": 9190 }, { "epoch": 1.3917252855305953, "grad_norm": 0.07425800710916519, "learning_rate": 0.00016322000000000003, "loss": 0.0319, "step": 9200 }, { "epoch": 1.3917252855305953, "eval_cer": 0.11210909414354649, "eval_loss": 0.036687206476926804, "eval_runtime": 10439.2076, "eval_samples_per_second": 2.017, "eval_steps_per_second": 0.252, "step": 9200 }, { "epoch": 1.393238030406172, "grad_norm": 0.0754477009177208, "learning_rate": 0.00016318, "loss": 0.0355, "step": 9210 }, { "epoch": 1.3947507752817487, "grad_norm": 0.06408898532390594, "learning_rate": 0.00016314, "loss": 0.0345, "step": 9220 }, { "epoch": 1.3962635201573255, "grad_norm": 0.06003674492239952, "learning_rate": 0.0001631, "loss": 0.0316, "step": 9230 }, { "epoch": 1.3977762650329022, "grad_norm": 0.07409165799617767, "learning_rate": 0.00016306, "loss": 0.03, "step": 9240 }, { "epoch": 1.399289009908479, "grad_norm": 0.07411226630210876, "learning_rate": 0.00016302000000000002, "loss": 0.0325, "step": 9250 }, { "epoch": 1.4008017547840557, "grad_norm": 0.09041300415992737, "learning_rate": 0.00016298, "loss": 0.034, "step": 9260 }, { "epoch": 1.4023144996596324, "grad_norm": 0.0684356689453125, "learning_rate": 0.00016294, "loss": 0.0345, "step": 9270 }, { "epoch": 1.4038272445352091, "grad_norm": 0.08621818572282791, "learning_rate": 0.0001629, "loss": 0.0287, "step": 9280 }, { "epoch": 1.4053399894107859, "grad_norm": 0.09592179954051971, "learning_rate": 0.00016286000000000002, "loss": 0.0371, "step": 9290 }, { "epoch": 1.4068527342863626, "grad_norm": 0.061489395797252655, "learning_rate": 0.00016282000000000002, "loss": 0.0297, "step": 9300 }, { "epoch": 1.4083654791619393, "grad_norm": 0.08933687955141068, "learning_rate": 0.00016278, "loss": 0.0329, "step": 9310 }, { "epoch": 1.409878224037516, "grad_norm": 0.06542832404375076, "learning_rate": 0.00016274, "loss": 0.0359, "step": 9320 }, { "epoch": 1.4113909689130928, "grad_norm": 0.10515543818473816, "learning_rate": 0.0001627, "loss": 0.0282, "step": 9330 }, { "epoch": 1.4129037137886695, "grad_norm": 0.11535684019327164, "learning_rate": 0.00016266000000000002, "loss": 0.0346, "step": 9340 }, { "epoch": 1.4144164586642463, "grad_norm": 0.10359009355306625, "learning_rate": 0.00016262, "loss": 0.0326, "step": 9350 }, { "epoch": 1.415929203539823, "grad_norm": 0.08905740082263947, "learning_rate": 0.00016258, "loss": 0.0353, "step": 9360 }, { "epoch": 1.4174419484153997, "grad_norm": 0.0570446141064167, "learning_rate": 0.00016254, "loss": 0.0282, "step": 9370 }, { "epoch": 1.4189546932909765, "grad_norm": 0.0748140960931778, "learning_rate": 0.00016250000000000002, "loss": 0.0304, "step": 9380 }, { "epoch": 1.4204674381665532, "grad_norm": 0.07355400919914246, "learning_rate": 0.00016246, "loss": 0.031, "step": 9390 }, { "epoch": 1.42198018304213, "grad_norm": 0.09431416541337967, "learning_rate": 0.00016242, "loss": 0.0355, "step": 9400 }, { "epoch": 1.42198018304213, "eval_cer": 0.09460805024547048, "eval_loss": 0.03653513640165329, "eval_runtime": 10519.6629, "eval_samples_per_second": 2.001, "eval_steps_per_second": 0.25, "step": 9400 }, { "epoch": 1.4234929279177067, "grad_norm": 0.10641132295131683, "learning_rate": 0.00016238, "loss": 0.0299, "step": 9410 }, { "epoch": 1.4250056727932834, "grad_norm": 0.051270656287670135, "learning_rate": 0.00016234, "loss": 0.0317, "step": 9420 }, { "epoch": 1.4265184176688601, "grad_norm": 0.07362283766269684, "learning_rate": 0.00016230000000000001, "loss": 0.0269, "step": 9430 }, { "epoch": 1.4280311625444368, "grad_norm": 0.060159552842378616, "learning_rate": 0.00016226, "loss": 0.0335, "step": 9440 }, { "epoch": 1.4295439074200136, "grad_norm": 0.08667318522930145, "learning_rate": 0.00016222000000000003, "loss": 0.0361, "step": 9450 }, { "epoch": 1.4310566522955903, "grad_norm": 0.06154588237404823, "learning_rate": 0.00016218, "loss": 0.0334, "step": 9460 }, { "epoch": 1.432569397171167, "grad_norm": 0.10563425719738007, "learning_rate": 0.00016214000000000002, "loss": 0.0362, "step": 9470 }, { "epoch": 1.4340821420467438, "grad_norm": 0.10325556248426437, "learning_rate": 0.0001621, "loss": 0.0343, "step": 9480 }, { "epoch": 1.4355948869223205, "grad_norm": 0.08902329206466675, "learning_rate": 0.00016206, "loss": 0.032, "step": 9490 }, { "epoch": 1.4371076317978972, "grad_norm": 0.07280543446540833, "learning_rate": 0.00016202000000000002, "loss": 0.0366, "step": 9500 }, { "epoch": 1.438620376673474, "grad_norm": 0.09071139991283417, "learning_rate": 0.00016198, "loss": 0.0299, "step": 9510 }, { "epoch": 1.4401331215490507, "grad_norm": 0.06658421456813812, "learning_rate": 0.00016194, "loss": 0.0281, "step": 9520 }, { "epoch": 1.4416458664246274, "grad_norm": 0.0793207511305809, "learning_rate": 0.0001619, "loss": 0.0292, "step": 9530 }, { "epoch": 1.4431586113002042, "grad_norm": 0.0829392522573471, "learning_rate": 0.00016186, "loss": 0.0337, "step": 9540 }, { "epoch": 1.444671356175781, "grad_norm": 0.061817191541194916, "learning_rate": 0.00016182000000000002, "loss": 0.0298, "step": 9550 }, { "epoch": 1.4461841010513576, "grad_norm": 0.09837779402732849, "learning_rate": 0.00016177999999999999, "loss": 0.037, "step": 9560 }, { "epoch": 1.4476968459269344, "grad_norm": 0.05777046084403992, "learning_rate": 0.00016174, "loss": 0.0339, "step": 9570 }, { "epoch": 1.449209590802511, "grad_norm": 0.07731931656599045, "learning_rate": 0.0001617, "loss": 0.0338, "step": 9580 }, { "epoch": 1.4507223356780878, "grad_norm": 0.08898504078388214, "learning_rate": 0.00016166000000000002, "loss": 0.0358, "step": 9590 }, { "epoch": 1.4522350805536646, "grad_norm": 0.0696534812450409, "learning_rate": 0.00016162000000000001, "loss": 0.0318, "step": 9600 }, { "epoch": 1.4522350805536646, "eval_cer": 0.08453906649568975, "eval_loss": 0.036363635212183, "eval_runtime": 10514.0599, "eval_samples_per_second": 2.002, "eval_steps_per_second": 0.25, "step": 9600 }, { "epoch": 1.4537478254292413, "grad_norm": 0.059242941439151764, "learning_rate": 0.00016158, "loss": 0.0313, "step": 9610 }, { "epoch": 1.455260570304818, "grad_norm": 0.0844852551817894, "learning_rate": 0.00016154, "loss": 0.034, "step": 9620 }, { "epoch": 1.4567733151803948, "grad_norm": 0.08737514168024063, "learning_rate": 0.0001615, "loss": 0.0314, "step": 9630 }, { "epoch": 1.4582860600559715, "grad_norm": 0.08028477430343628, "learning_rate": 0.00016146000000000002, "loss": 0.028, "step": 9640 }, { "epoch": 1.4597988049315482, "grad_norm": 0.08293917775154114, "learning_rate": 0.00016142, "loss": 0.0344, "step": 9650 }, { "epoch": 1.461311549807125, "grad_norm": 0.07055462896823883, "learning_rate": 0.00016138, "loss": 0.0329, "step": 9660 }, { "epoch": 1.4628242946827017, "grad_norm": 0.08431320637464523, "learning_rate": 0.00016134, "loss": 0.0313, "step": 9670 }, { "epoch": 1.4643370395582784, "grad_norm": 0.09756868332624435, "learning_rate": 0.00016130000000000002, "loss": 0.0305, "step": 9680 }, { "epoch": 1.4658497844338552, "grad_norm": 0.07265082001686096, "learning_rate": 0.00016126, "loss": 0.0333, "step": 9690 }, { "epoch": 1.467362529309432, "grad_norm": 0.09156455099582672, "learning_rate": 0.00016122, "loss": 0.0356, "step": 9700 }, { "epoch": 1.4688752741850086, "grad_norm": 0.06957582384347916, "learning_rate": 0.00016118, "loss": 0.0313, "step": 9710 }, { "epoch": 1.4703880190605854, "grad_norm": 0.06783420592546463, "learning_rate": 0.00016114, "loss": 0.0297, "step": 9720 }, { "epoch": 1.471900763936162, "grad_norm": 0.07193417102098465, "learning_rate": 0.0001611, "loss": 0.0302, "step": 9730 }, { "epoch": 1.4734135088117388, "grad_norm": 0.08238872140645981, "learning_rate": 0.00016106, "loss": 0.0335, "step": 9740 }, { "epoch": 1.4749262536873156, "grad_norm": 0.07197025418281555, "learning_rate": 0.00016102000000000003, "loss": 0.0369, "step": 9750 }, { "epoch": 1.4764389985628923, "grad_norm": 0.08109525591135025, "learning_rate": 0.00016098, "loss": 0.0327, "step": 9760 }, { "epoch": 1.477951743438469, "grad_norm": 0.12331151217222214, "learning_rate": 0.00016094000000000001, "loss": 0.0372, "step": 9770 }, { "epoch": 1.4794644883140458, "grad_norm": 0.08190298080444336, "learning_rate": 0.0001609, "loss": 0.0293, "step": 9780 }, { "epoch": 1.4809772331896225, "grad_norm": 0.05840008333325386, "learning_rate": 0.00016086, "loss": 0.0349, "step": 9790 }, { "epoch": 1.4824899780651992, "grad_norm": 0.07874023169279099, "learning_rate": 0.00016082000000000002, "loss": 0.0322, "step": 9800 }, { "epoch": 1.4824899780651992, "eval_cer": 0.24973192203254985, "eval_loss": 0.036100711673498154, "eval_runtime": 10381.657, "eval_samples_per_second": 2.028, "eval_steps_per_second": 0.254, "step": 9800 }, { "epoch": 1.484002722940776, "grad_norm": 0.0776941329240799, "learning_rate": 0.00016078, "loss": 0.0358, "step": 9810 }, { "epoch": 1.4855154678163527, "grad_norm": 0.12248267233371735, "learning_rate": 0.00016074, "loss": 0.0356, "step": 9820 }, { "epoch": 1.4870282126919294, "grad_norm": 0.08847146481275558, "learning_rate": 0.0001607, "loss": 0.0274, "step": 9830 }, { "epoch": 1.4885409575675062, "grad_norm": 0.0689850002527237, "learning_rate": 0.00016066000000000002, "loss": 0.0266, "step": 9840 }, { "epoch": 1.4900537024430829, "grad_norm": 0.06342552602291107, "learning_rate": 0.00016062000000000002, "loss": 0.031, "step": 9850 }, { "epoch": 1.4915664473186596, "grad_norm": 0.11846140772104263, "learning_rate": 0.00016057999999999998, "loss": 0.0348, "step": 9860 }, { "epoch": 1.4930791921942363, "grad_norm": 0.07698410004377365, "learning_rate": 0.00016054, "loss": 0.0259, "step": 9870 }, { "epoch": 1.494591937069813, "grad_norm": 0.11177106946706772, "learning_rate": 0.0001605, "loss": 0.0301, "step": 9880 }, { "epoch": 1.4961046819453898, "grad_norm": 0.09459209442138672, "learning_rate": 0.00016046000000000002, "loss": 0.0349, "step": 9890 }, { "epoch": 1.4976174268209665, "grad_norm": 0.08800119906663895, "learning_rate": 0.00016042, "loss": 0.0335, "step": 9900 }, { "epoch": 1.4991301716965433, "grad_norm": 0.09330447763204575, "learning_rate": 0.00016038, "loss": 0.0326, "step": 9910 }, { "epoch": 1.50064291657212, "grad_norm": 0.10210063308477402, "learning_rate": 0.00016034, "loss": 0.035, "step": 9920 }, { "epoch": 1.5021556614476967, "grad_norm": 0.11886809766292572, "learning_rate": 0.0001603, "loss": 0.036, "step": 9930 }, { "epoch": 1.5036684063232735, "grad_norm": 0.07646410167217255, "learning_rate": 0.00016026000000000001, "loss": 0.0269, "step": 9940 }, { "epoch": 1.5051811511988502, "grad_norm": 0.09994587302207947, "learning_rate": 0.00016022, "loss": 0.0298, "step": 9950 }, { "epoch": 1.506693896074427, "grad_norm": 0.0781632736325264, "learning_rate": 0.00016018, "loss": 0.0299, "step": 9960 }, { "epoch": 1.5082066409500037, "grad_norm": 0.09286709874868393, "learning_rate": 0.00016014, "loss": 0.0334, "step": 9970 }, { "epoch": 1.5097193858255804, "grad_norm": 0.08658807724714279, "learning_rate": 0.00016010000000000002, "loss": 0.032, "step": 9980 }, { "epoch": 1.5112321307011571, "grad_norm": 0.09535326808691025, "learning_rate": 0.00016006, "loss": 0.032, "step": 9990 }, { "epoch": 1.5127448755767339, "grad_norm": 0.056372299790382385, "learning_rate": 0.00016002, "loss": 0.033, "step": 10000 }, { "epoch": 1.5127448755767339, "eval_cer": 0.1808933296766016, "eval_loss": 0.03580623120069504, "eval_runtime": 10388.4948, "eval_samples_per_second": 2.026, "eval_steps_per_second": 0.253, "step": 10000 } ], "logging_steps": 10, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.622822387689695e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }