|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5127448755767339, |
|
"eval_steps": 200, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001512744875576734, |
|
"grad_norm": 0.8282566070556641, |
|
"learning_rate": 0.00019996, |
|
"loss": 3.4576, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003025489751153468, |
|
"grad_norm": 0.1628154069185257, |
|
"learning_rate": 0.00019992000000000002, |
|
"loss": 0.0992, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004538234626730202, |
|
"grad_norm": 0.17421123385429382, |
|
"learning_rate": 0.00019988, |
|
"loss": 0.0666, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006050979502306936, |
|
"grad_norm": 0.08850277960300446, |
|
"learning_rate": 0.00019984, |
|
"loss": 0.0661, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00756372437788367, |
|
"grad_norm": 0.11368270963430405, |
|
"learning_rate": 0.0001998, |
|
"loss": 0.0639, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009076469253460404, |
|
"grad_norm": 0.12990300357341766, |
|
"learning_rate": 0.00019976000000000003, |
|
"loss": 0.0617, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.010589214129037138, |
|
"grad_norm": 0.08885369449853897, |
|
"learning_rate": 0.00019972000000000002, |
|
"loss": 0.0643, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.012101959004613872, |
|
"grad_norm": 0.07073435187339783, |
|
"learning_rate": 0.00019968, |
|
"loss": 0.0629, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.013614703880190605, |
|
"grad_norm": 0.061856113374233246, |
|
"learning_rate": 0.00019964, |
|
"loss": 0.061, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01512744875576734, |
|
"grad_norm": 0.06827201694250107, |
|
"learning_rate": 0.0001996, |
|
"loss": 0.0586, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016640193631344075, |
|
"grad_norm": 0.07220456004142761, |
|
"learning_rate": 0.00019956000000000002, |
|
"loss": 0.055, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.018152938506920808, |
|
"grad_norm": 0.06632555276155472, |
|
"learning_rate": 0.00019952000000000001, |
|
"loss": 0.0586, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01966568338249754, |
|
"grad_norm": 0.09966724365949631, |
|
"learning_rate": 0.00019948, |
|
"loss": 0.0621, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.021178428258074276, |
|
"grad_norm": 0.0833888053894043, |
|
"learning_rate": 0.00019944, |
|
"loss": 0.0591, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02269117313365101, |
|
"grad_norm": 0.08170727640390396, |
|
"learning_rate": 0.00019940000000000002, |
|
"loss": 0.055, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.024203918009227745, |
|
"grad_norm": 0.07089231163263321, |
|
"learning_rate": 0.00019936000000000002, |
|
"loss": 0.0582, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.025716662884804477, |
|
"grad_norm": 0.09390200674533844, |
|
"learning_rate": 0.00019932, |
|
"loss": 0.0628, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02722940776038121, |
|
"grad_norm": 0.06722863018512726, |
|
"learning_rate": 0.00019928, |
|
"loss": 0.0591, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.028742152635957946, |
|
"grad_norm": 0.0743609368801117, |
|
"learning_rate": 0.00019924, |
|
"loss": 0.0626, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03025489751153468, |
|
"grad_norm": 0.08125407248735428, |
|
"learning_rate": 0.00019920000000000002, |
|
"loss": 0.0601, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03025489751153468, |
|
"eval_cer": 0.5356160728183765, |
|
"eval_loss": 0.05078176036477089, |
|
"eval_runtime": 10281.8657, |
|
"eval_samples_per_second": 2.047, |
|
"eval_steps_per_second": 0.256, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03176764238711141, |
|
"grad_norm": 0.07030890136957169, |
|
"learning_rate": 0.00019916, |
|
"loss": 0.0597, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03328038726268815, |
|
"grad_norm": 0.05290469154715538, |
|
"learning_rate": 0.00019912, |
|
"loss": 0.0587, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03479313213826488, |
|
"grad_norm": 0.07339277863502502, |
|
"learning_rate": 0.00019908, |
|
"loss": 0.0529, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.036305877013841616, |
|
"grad_norm": 0.0727711170911789, |
|
"learning_rate": 0.00019904, |
|
"loss": 0.0539, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03781862188941835, |
|
"grad_norm": 0.07383541762828827, |
|
"learning_rate": 0.000199, |
|
"loss": 0.0532, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03933136676499508, |
|
"grad_norm": 0.07042526453733444, |
|
"learning_rate": 0.00019896, |
|
"loss": 0.0571, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04084411164057182, |
|
"grad_norm": 0.08188482373952866, |
|
"learning_rate": 0.00019892000000000003, |
|
"loss": 0.0521, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04235685651614855, |
|
"grad_norm": 0.07334589958190918, |
|
"learning_rate": 0.00019888, |
|
"loss": 0.0532, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.043869601391725285, |
|
"grad_norm": 0.06326377391815186, |
|
"learning_rate": 0.00019884000000000001, |
|
"loss": 0.0528, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.04538234626730202, |
|
"grad_norm": 0.05303795263171196, |
|
"learning_rate": 0.0001988, |
|
"loss": 0.0539, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04689509114287875, |
|
"grad_norm": 0.058723289519548416, |
|
"learning_rate": 0.00019876, |
|
"loss": 0.0469, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04840783601845549, |
|
"grad_norm": 0.08683237433433533, |
|
"learning_rate": 0.00019872000000000002, |
|
"loss": 0.0601, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.04992058089403222, |
|
"grad_norm": 0.07650341093540192, |
|
"learning_rate": 0.00019868, |
|
"loss": 0.0582, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.051433325769608955, |
|
"grad_norm": 0.054965659976005554, |
|
"learning_rate": 0.00019864, |
|
"loss": 0.0548, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05294607064518569, |
|
"grad_norm": 0.06949716061353683, |
|
"learning_rate": 0.0001986, |
|
"loss": 0.0581, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05445881552076242, |
|
"grad_norm": 0.10514732450246811, |
|
"learning_rate": 0.00019856000000000002, |
|
"loss": 0.0587, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.05597156039633916, |
|
"grad_norm": 0.06586117297410965, |
|
"learning_rate": 0.00019852000000000002, |
|
"loss": 0.0561, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.05748430527191589, |
|
"grad_norm": 0.09821395576000214, |
|
"learning_rate": 0.00019848, |
|
"loss": 0.0556, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.058997050147492625, |
|
"grad_norm": 0.06488014757633209, |
|
"learning_rate": 0.00019844, |
|
"loss": 0.0634, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06050979502306936, |
|
"grad_norm": 0.06910958141088486, |
|
"learning_rate": 0.0001984, |
|
"loss": 0.052, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06050979502306936, |
|
"eval_cer": 0.2714758865721352, |
|
"eval_loss": 0.04847713187336922, |
|
"eval_runtime": 10484.76, |
|
"eval_samples_per_second": 2.008, |
|
"eval_steps_per_second": 0.251, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0620225398986461, |
|
"grad_norm": 0.048563435673713684, |
|
"learning_rate": 0.00019836000000000002, |
|
"loss": 0.0565, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.06353528477422282, |
|
"grad_norm": 0.055841896682977676, |
|
"learning_rate": 0.00019832, |
|
"loss": 0.0547, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.06504802964979955, |
|
"grad_norm": 0.05644605681300163, |
|
"learning_rate": 0.00019828, |
|
"loss": 0.0575, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.0665607745253763, |
|
"grad_norm": 0.05617703124880791, |
|
"learning_rate": 0.00019824, |
|
"loss": 0.0514, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.06807351940095303, |
|
"grad_norm": 0.11480820178985596, |
|
"learning_rate": 0.00019820000000000002, |
|
"loss": 0.0562, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.06958626427652977, |
|
"grad_norm": 0.06004955247044563, |
|
"learning_rate": 0.00019816000000000001, |
|
"loss": 0.0575, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0710990091521065, |
|
"grad_norm": 0.07830873131752014, |
|
"learning_rate": 0.00019812, |
|
"loss": 0.0621, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.07261175402768323, |
|
"grad_norm": 0.052650969475507736, |
|
"learning_rate": 0.00019808, |
|
"loss": 0.0599, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.07412449890325996, |
|
"grad_norm": 0.09298545122146606, |
|
"learning_rate": 0.00019804, |
|
"loss": 0.0559, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.0756372437788367, |
|
"grad_norm": 0.06198689714074135, |
|
"learning_rate": 0.00019800000000000002, |
|
"loss": 0.047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07714998865441343, |
|
"grad_norm": 0.06688915193080902, |
|
"learning_rate": 0.00019796, |
|
"loss": 0.0523, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.07866273352999016, |
|
"grad_norm": 0.06676903367042542, |
|
"learning_rate": 0.00019792000000000003, |
|
"loss": 0.0509, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08017547840556691, |
|
"grad_norm": 0.06219707056879997, |
|
"learning_rate": 0.00019788, |
|
"loss": 0.0553, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.08168822328114364, |
|
"grad_norm": 0.07905440032482147, |
|
"learning_rate": 0.00019784, |
|
"loss": 0.0506, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.08320096815672037, |
|
"grad_norm": 0.08591905236244202, |
|
"learning_rate": 0.0001978, |
|
"loss": 0.0603, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0847137130322971, |
|
"grad_norm": 0.05921874940395355, |
|
"learning_rate": 0.00019776, |
|
"loss": 0.0562, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.08622645790787384, |
|
"grad_norm": 0.058868613094091415, |
|
"learning_rate": 0.00019772000000000002, |
|
"loss": 0.0517, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.08773920278345057, |
|
"grad_norm": 0.06818246096372604, |
|
"learning_rate": 0.00019768, |
|
"loss": 0.0478, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0892519476590273, |
|
"grad_norm": 0.07364825904369354, |
|
"learning_rate": 0.00019764, |
|
"loss": 0.0553, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.09076469253460404, |
|
"grad_norm": 0.07647281885147095, |
|
"learning_rate": 0.0001976, |
|
"loss": 0.0527, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09076469253460404, |
|
"eval_cer": 0.282631389088609, |
|
"eval_loss": 0.047340717166662216, |
|
"eval_runtime": 10466.4392, |
|
"eval_samples_per_second": 2.011, |
|
"eval_steps_per_second": 0.251, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09227743741018077, |
|
"grad_norm": 0.0819125548005104, |
|
"learning_rate": 0.00019756, |
|
"loss": 0.0509, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.0937901822857575, |
|
"grad_norm": 0.06566735357046127, |
|
"learning_rate": 0.00019752000000000002, |
|
"loss": 0.0583, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.09530292716133425, |
|
"grad_norm": 0.06856215745210648, |
|
"learning_rate": 0.00019748, |
|
"loss": 0.0465, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.09681567203691098, |
|
"grad_norm": 0.06130633130669594, |
|
"learning_rate": 0.00019744, |
|
"loss": 0.0509, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.09832841691248771, |
|
"grad_norm": 0.08208902925252914, |
|
"learning_rate": 0.0001974, |
|
"loss": 0.0549, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.09984116178806444, |
|
"grad_norm": 0.08106379210948944, |
|
"learning_rate": 0.00019736000000000002, |
|
"loss": 0.0584, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.10135390666364118, |
|
"grad_norm": 0.08364614844322205, |
|
"learning_rate": 0.00019732000000000001, |
|
"loss": 0.0543, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.10286665153921791, |
|
"grad_norm": 0.06432674080133438, |
|
"learning_rate": 0.00019728, |
|
"loss": 0.0535, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.10437939641479464, |
|
"grad_norm": 0.07217614352703094, |
|
"learning_rate": 0.00019724, |
|
"loss": 0.0521, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.10589214129037137, |
|
"grad_norm": 0.06074230372905731, |
|
"learning_rate": 0.0001972, |
|
"loss": 0.0545, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.10740488616594811, |
|
"grad_norm": 0.04888018220663071, |
|
"learning_rate": 0.00019716000000000002, |
|
"loss": 0.0445, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.10891763104152484, |
|
"grad_norm": 0.07705683261156082, |
|
"learning_rate": 0.00019712, |
|
"loss": 0.0491, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.11043037591710159, |
|
"grad_norm": 0.06741231679916382, |
|
"learning_rate": 0.00019708000000000003, |
|
"loss": 0.053, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.11194312079267832, |
|
"grad_norm": 0.0673738569021225, |
|
"learning_rate": 0.00019704, |
|
"loss": 0.0473, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.11345586566825505, |
|
"grad_norm": 0.06236235797405243, |
|
"learning_rate": 0.00019700000000000002, |
|
"loss": 0.0538, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.11496861054383178, |
|
"grad_norm": 0.0538531057536602, |
|
"learning_rate": 0.00019696, |
|
"loss": 0.0414, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.11648135541940852, |
|
"grad_norm": 0.09818791598081589, |
|
"learning_rate": 0.00019692, |
|
"loss": 0.0551, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.11799410029498525, |
|
"grad_norm": 0.06459952145814896, |
|
"learning_rate": 0.00019688000000000003, |
|
"loss": 0.0543, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.11950684517056198, |
|
"grad_norm": 0.09495878219604492, |
|
"learning_rate": 0.00019684, |
|
"loss": 0.0566, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.12101959004613871, |
|
"grad_norm": 0.06249309703707695, |
|
"learning_rate": 0.0001968, |
|
"loss": 0.0492, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12101959004613871, |
|
"eval_cer": 0.0030890735373690806, |
|
"eval_loss": 0.046879783272743225, |
|
"eval_runtime": 10443.0859, |
|
"eval_samples_per_second": 2.016, |
|
"eval_steps_per_second": 0.252, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12253233492171545, |
|
"grad_norm": 0.06483816355466843, |
|
"learning_rate": 0.00019676, |
|
"loss": 0.048, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1240450797972922, |
|
"grad_norm": 0.05618014931678772, |
|
"learning_rate": 0.00019672000000000003, |
|
"loss": 0.0484, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1255578246728689, |
|
"grad_norm": 0.07441507279872894, |
|
"learning_rate": 0.00019668000000000002, |
|
"loss": 0.0548, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.12707056954844564, |
|
"grad_norm": 0.05274181067943573, |
|
"learning_rate": 0.00019664000000000001, |
|
"loss": 0.0619, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.12858331442402238, |
|
"grad_norm": 0.06264190375804901, |
|
"learning_rate": 0.0001966, |
|
"loss": 0.0525, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1300960592995991, |
|
"grad_norm": 0.07662319391965866, |
|
"learning_rate": 0.00019656, |
|
"loss": 0.0532, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.13160880417517587, |
|
"grad_norm": 0.06203316152095795, |
|
"learning_rate": 0.00019652000000000002, |
|
"loss": 0.0525, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.1331215490507526, |
|
"grad_norm": 0.1326906681060791, |
|
"learning_rate": 0.00019648000000000002, |
|
"loss": 0.0539, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.13463429392632933, |
|
"grad_norm": 0.10350421816110611, |
|
"learning_rate": 0.00019644, |
|
"loss": 0.0556, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.13614703880190607, |
|
"grad_norm": 0.049543242901563644, |
|
"learning_rate": 0.0001964, |
|
"loss": 0.0482, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1376597836774828, |
|
"grad_norm": 0.11776097118854523, |
|
"learning_rate": 0.00019636000000000002, |
|
"loss": 0.0538, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.13917252855305953, |
|
"grad_norm": 0.05535553768277168, |
|
"learning_rate": 0.00019632000000000002, |
|
"loss": 0.052, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.14068527342863626, |
|
"grad_norm": 0.05945896357297897, |
|
"learning_rate": 0.00019628, |
|
"loss": 0.0491, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.142198018304213, |
|
"grad_norm": 0.1228972002863884, |
|
"learning_rate": 0.00019624, |
|
"loss": 0.0511, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.14371076317978973, |
|
"grad_norm": 0.08868791162967682, |
|
"learning_rate": 0.0001962, |
|
"loss": 0.057, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.14522350805536646, |
|
"grad_norm": 0.07960449159145355, |
|
"learning_rate": 0.00019616000000000002, |
|
"loss": 0.0514, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.1467362529309432, |
|
"grad_norm": 0.06392108649015427, |
|
"learning_rate": 0.00019612, |
|
"loss": 0.0558, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.14824899780651993, |
|
"grad_norm": 0.07048727571964264, |
|
"learning_rate": 0.00019608, |
|
"loss": 0.053, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.14976174268209666, |
|
"grad_norm": 0.10491488873958588, |
|
"learning_rate": 0.00019604, |
|
"loss": 0.0489, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.1512744875576734, |
|
"grad_norm": 0.059835776686668396, |
|
"learning_rate": 0.000196, |
|
"loss": 0.0474, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1512744875576734, |
|
"eval_cer": 0.4367181574025345, |
|
"eval_loss": 0.04569260776042938, |
|
"eval_runtime": 10457.5718, |
|
"eval_samples_per_second": 2.013, |
|
"eval_steps_per_second": 0.252, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15278723243325013, |
|
"grad_norm": 0.07570289075374603, |
|
"learning_rate": 0.00019596000000000001, |
|
"loss": 0.0522, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.15429997730882686, |
|
"grad_norm": 0.09082864969968796, |
|
"learning_rate": 0.00019592, |
|
"loss": 0.0516, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.1558127221844036, |
|
"grad_norm": 0.06894449889659882, |
|
"learning_rate": 0.00019588000000000003, |
|
"loss": 0.0489, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.15732546705998032, |
|
"grad_norm": 0.05989064276218414, |
|
"learning_rate": 0.00019584, |
|
"loss": 0.0514, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.15883821193555706, |
|
"grad_norm": 0.060047443956136703, |
|
"learning_rate": 0.00019580000000000002, |
|
"loss": 0.047, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.16035095681113382, |
|
"grad_norm": 0.06459174305200577, |
|
"learning_rate": 0.00019576, |
|
"loss": 0.0532, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.16186370168671055, |
|
"grad_norm": 0.061583805829286575, |
|
"learning_rate": 0.00019572, |
|
"loss": 0.0485, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.16337644656228728, |
|
"grad_norm": 0.060534268617630005, |
|
"learning_rate": 0.00019568000000000002, |
|
"loss": 0.0468, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.164889191437864, |
|
"grad_norm": 0.06731607764959335, |
|
"learning_rate": 0.00019564, |
|
"loss": 0.0481, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.16640193631344075, |
|
"grad_norm": 0.0757998675107956, |
|
"learning_rate": 0.0001956, |
|
"loss": 0.056, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.16791468118901748, |
|
"grad_norm": 0.08009450882673264, |
|
"learning_rate": 0.00019556, |
|
"loss": 0.0523, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.1694274260645942, |
|
"grad_norm": 2.663090944290161, |
|
"learning_rate": 0.00019552000000000003, |
|
"loss": 0.1404, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 14.877944946289062, |
|
"learning_rate": 0.00019548000000000002, |
|
"loss": 0.1442, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.17245291581574768, |
|
"grad_norm": 2.8173887729644775, |
|
"learning_rate": 0.000195448, |
|
"loss": 0.5461, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.1739656606913244, |
|
"grad_norm": 9.367515563964844, |
|
"learning_rate": 0.00019540800000000002, |
|
"loss": 0.2832, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.17547840556690114, |
|
"grad_norm": 0.34991636872291565, |
|
"learning_rate": 0.00019536800000000002, |
|
"loss": 0.1497, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 0.10464385151863098, |
|
"learning_rate": 0.000195328, |
|
"loss": 0.0686, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.1785038953180546, |
|
"grad_norm": 0.8961012363433838, |
|
"learning_rate": 0.000195288, |
|
"loss": 0.0822, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.18001664019363134, |
|
"grad_norm": 8.467473983764648, |
|
"learning_rate": 0.000195248, |
|
"loss": 0.0949, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.18152938506920807, |
|
"grad_norm": 0.08059060573577881, |
|
"learning_rate": 0.00019520800000000002, |
|
"loss": 0.0552, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.18152938506920807, |
|
"eval_cer": 0.0833932493767496, |
|
"eval_loss": 0.04637393727898598, |
|
"eval_runtime": 10459.5021, |
|
"eval_samples_per_second": 2.013, |
|
"eval_steps_per_second": 0.252, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1830421299447848, |
|
"grad_norm": 0.08795847743749619, |
|
"learning_rate": 0.000195168, |
|
"loss": 0.055, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.18455487482036154, |
|
"grad_norm": 0.10272721946239471, |
|
"learning_rate": 0.000195128, |
|
"loss": 0.0557, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.18606761969593827, |
|
"grad_norm": 0.23404774069786072, |
|
"learning_rate": 0.000195088, |
|
"loss": 0.0611, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.187580364571515, |
|
"grad_norm": 0.2968621253967285, |
|
"learning_rate": 0.00019504800000000002, |
|
"loss": 0.0817, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.18909310944709176, |
|
"grad_norm": 0.08634278923273087, |
|
"learning_rate": 0.00019500800000000001, |
|
"loss": 0.0685, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1906058543226685, |
|
"grad_norm": 0.11241244524717331, |
|
"learning_rate": 0.000194968, |
|
"loss": 0.0563, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.19211859919824523, |
|
"grad_norm": 0.17380298674106598, |
|
"learning_rate": 0.000194928, |
|
"loss": 0.065, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.19363134407382196, |
|
"grad_norm": 0.13615791499614716, |
|
"learning_rate": 0.000194888, |
|
"loss": 0.0667, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.1951440889493987, |
|
"grad_norm": 0.0854301005601883, |
|
"learning_rate": 0.00019484800000000002, |
|
"loss": 0.0507, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.19665683382497542, |
|
"grad_norm": 0.08915933966636658, |
|
"learning_rate": 0.000194808, |
|
"loss": 0.0561, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.19816957870055216, |
|
"grad_norm": 0.09583040326833725, |
|
"learning_rate": 0.00019476800000000003, |
|
"loss": 0.0514, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.1996823235761289, |
|
"grad_norm": 0.09624961763620377, |
|
"learning_rate": 0.000194728, |
|
"loss": 0.052, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.20119506845170562, |
|
"grad_norm": 0.05612370744347572, |
|
"learning_rate": 0.00019468800000000002, |
|
"loss": 0.0471, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.20270781332728235, |
|
"grad_norm": 0.0653730109333992, |
|
"learning_rate": 0.000194648, |
|
"loss": 0.0521, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.2042205582028591, |
|
"grad_norm": 0.07432978600263596, |
|
"learning_rate": 0.000194608, |
|
"loss": 0.0577, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.20573330307843582, |
|
"grad_norm": 0.05863150209188461, |
|
"learning_rate": 0.00019456800000000003, |
|
"loss": 0.0435, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.20724604795401255, |
|
"grad_norm": 0.056969739496707916, |
|
"learning_rate": 0.000194528, |
|
"loss": 0.0502, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.20875879282958928, |
|
"grad_norm": 0.10658754408359528, |
|
"learning_rate": 0.000194488, |
|
"loss": 0.0469, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.21027153770516602, |
|
"grad_norm": 0.06535681337118149, |
|
"learning_rate": 0.000194448, |
|
"loss": 0.0519, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.21178428258074275, |
|
"grad_norm": 0.08987314254045486, |
|
"learning_rate": 0.000194408, |
|
"loss": 0.0482, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.21178428258074275, |
|
"eval_cer": 0.14607469615771385, |
|
"eval_loss": 0.04351452365517616, |
|
"eval_runtime": 10473.9712, |
|
"eval_samples_per_second": 2.01, |
|
"eval_steps_per_second": 0.251, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.21329702745631948, |
|
"grad_norm": 0.09238473325967789, |
|
"learning_rate": 0.00019436800000000002, |
|
"loss": 0.0483, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.21480977233189621, |
|
"grad_norm": 0.10443761199712753, |
|
"learning_rate": 0.000194328, |
|
"loss": 0.054, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.21632251720747295, |
|
"grad_norm": 0.0742131844162941, |
|
"learning_rate": 0.000194288, |
|
"loss": 0.0507, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.21783526208304968, |
|
"grad_norm": 0.09358492493629456, |
|
"learning_rate": 0.000194248, |
|
"loss": 0.0496, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.21934800695862644, |
|
"grad_norm": 0.07695715129375458, |
|
"learning_rate": 0.00019420800000000002, |
|
"loss": 0.046, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.22086075183420317, |
|
"grad_norm": 0.07772234827280045, |
|
"learning_rate": 0.00019416800000000002, |
|
"loss": 0.0468, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.2223734967097799, |
|
"grad_norm": 0.04500894993543625, |
|
"learning_rate": 0.000194128, |
|
"loss": 0.0428, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.22388624158535664, |
|
"grad_norm": 0.08258084207773209, |
|
"learning_rate": 0.000194088, |
|
"loss": 0.0542, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.22539898646093337, |
|
"grad_norm": 0.06530752032995224, |
|
"learning_rate": 0.000194048, |
|
"loss": 0.0477, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.2269117313365101, |
|
"grad_norm": 0.06770725548267365, |
|
"learning_rate": 0.00019400800000000002, |
|
"loss": 0.052, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22842447621208684, |
|
"grad_norm": 0.04499737173318863, |
|
"learning_rate": 0.000193968, |
|
"loss": 0.0392, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.22993722108766357, |
|
"grad_norm": 0.0594199039041996, |
|
"learning_rate": 0.000193928, |
|
"loss": 0.0469, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.2314499659632403, |
|
"grad_norm": 0.05143499746918678, |
|
"learning_rate": 0.000193888, |
|
"loss": 0.0384, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.23296271083881703, |
|
"grad_norm": 0.05464276298880577, |
|
"learning_rate": 0.00019384800000000002, |
|
"loss": 0.0479, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.23447545571439377, |
|
"grad_norm": 0.0698809027671814, |
|
"learning_rate": 0.000193808, |
|
"loss": 0.0493, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.2359882005899705, |
|
"grad_norm": 0.059237249195575714, |
|
"learning_rate": 0.000193768, |
|
"loss": 0.0493, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.23750094546554723, |
|
"grad_norm": 0.08654357492923737, |
|
"learning_rate": 0.000193728, |
|
"loss": 0.0481, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.23901369034112396, |
|
"grad_norm": 0.19063305854797363, |
|
"learning_rate": 0.000193688, |
|
"loss": 0.051, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.2405264352167007, |
|
"grad_norm": 0.08095410466194153, |
|
"learning_rate": 0.000193648, |
|
"loss": 0.0447, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.24203918009227743, |
|
"grad_norm": 0.056007932871580124, |
|
"learning_rate": 0.000193608, |
|
"loss": 0.0431, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.24203918009227743, |
|
"eval_cer": 0.1667197881072213, |
|
"eval_loss": 0.04373455420136452, |
|
"eval_runtime": 10595.1515, |
|
"eval_samples_per_second": 1.987, |
|
"eval_steps_per_second": 0.248, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.24355192496785416, |
|
"grad_norm": 0.06981740891933441, |
|
"learning_rate": 0.00019356800000000003, |
|
"loss": 0.0442, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.2450646698434309, |
|
"grad_norm": 0.10189545899629593, |
|
"learning_rate": 0.000193528, |
|
"loss": 0.0477, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.24657741471900763, |
|
"grad_norm": 0.06565351039171219, |
|
"learning_rate": 0.00019348800000000002, |
|
"loss": 0.0532, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.2480901595945844, |
|
"grad_norm": 0.06872796267271042, |
|
"learning_rate": 0.000193448, |
|
"loss": 0.0472, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.24960290447016112, |
|
"grad_norm": 0.06040889397263527, |
|
"learning_rate": 0.000193408, |
|
"loss": 0.0463, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.2511156493457378, |
|
"grad_norm": 0.08789139986038208, |
|
"learning_rate": 0.00019336800000000002, |
|
"loss": 0.0495, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.25262839422131456, |
|
"grad_norm": 0.0869157686829567, |
|
"learning_rate": 0.00019332800000000002, |
|
"loss": 0.0491, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.2541411390968913, |
|
"grad_norm": 0.06886725127696991, |
|
"learning_rate": 0.000193288, |
|
"loss": 0.0508, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.255653883972468, |
|
"grad_norm": 0.06138046458363533, |
|
"learning_rate": 0.000193248, |
|
"loss": 0.0435, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.25716662884804475, |
|
"grad_norm": 0.05554139241576195, |
|
"learning_rate": 0.00019320800000000002, |
|
"loss": 0.0483, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2586793737236215, |
|
"grad_norm": 0.06712419539690018, |
|
"learning_rate": 0.00019316800000000002, |
|
"loss": 0.0545, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.2601921185991982, |
|
"grad_norm": 0.07289120554924011, |
|
"learning_rate": 0.000193128, |
|
"loss": 0.0481, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.261704863474775, |
|
"grad_norm": 0.07003842294216156, |
|
"learning_rate": 0.000193088, |
|
"loss": 0.0493, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.26321760835035174, |
|
"grad_norm": 0.06333723664283752, |
|
"learning_rate": 0.000193048, |
|
"loss": 0.0536, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.26473035322592847, |
|
"grad_norm": 0.0609460324048996, |
|
"learning_rate": 0.00019300800000000002, |
|
"loss": 0.0516, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2662430981015052, |
|
"grad_norm": 0.14176234602928162, |
|
"learning_rate": 0.000192968, |
|
"loss": 0.0522, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.26775584297708194, |
|
"grad_norm": 0.09526730328798294, |
|
"learning_rate": 0.000192928, |
|
"loss": 0.0468, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.26926858785265867, |
|
"grad_norm": 0.05794398859143257, |
|
"learning_rate": 0.000192888, |
|
"loss": 0.051, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.2707813327282354, |
|
"grad_norm": 0.07408788055181503, |
|
"learning_rate": 0.000192848, |
|
"loss": 0.0482, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.27229407760381213, |
|
"grad_norm": 0.07873456180095673, |
|
"learning_rate": 0.00019280800000000001, |
|
"loss": 0.0576, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.27229407760381213, |
|
"eval_cer": 0.28151275038111545, |
|
"eval_loss": 0.042666129767894745, |
|
"eval_runtime": 10460.0372, |
|
"eval_samples_per_second": 2.013, |
|
"eval_steps_per_second": 0.252, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.27380682247938887, |
|
"grad_norm": 0.06786733120679855, |
|
"learning_rate": 0.000192768, |
|
"loss": 0.0505, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.2753195673549656, |
|
"grad_norm": 0.090096116065979, |
|
"learning_rate": 0.00019272800000000003, |
|
"loss": 0.0458, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.27683231223054233, |
|
"grad_norm": 0.058033574372529984, |
|
"learning_rate": 0.000192688, |
|
"loss": 0.0415, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.27834505710611906, |
|
"grad_norm": 0.09522871673107147, |
|
"learning_rate": 0.00019264800000000002, |
|
"loss": 0.0456, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.2798578019816958, |
|
"grad_norm": 0.06533698737621307, |
|
"learning_rate": 0.000192608, |
|
"loss": 0.045, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.28137054685727253, |
|
"grad_norm": 0.07162319868803024, |
|
"learning_rate": 0.000192568, |
|
"loss": 0.0511, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.28288329173284926, |
|
"grad_norm": 0.06015852093696594, |
|
"learning_rate": 0.00019252800000000002, |
|
"loss": 0.0453, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.284396036608426, |
|
"grad_norm": 0.0789792612195015, |
|
"learning_rate": 0.000192488, |
|
"loss": 0.0498, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.2859087814840027, |
|
"grad_norm": 0.05619093030691147, |
|
"learning_rate": 0.000192448, |
|
"loss": 0.0454, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.28742152635957946, |
|
"grad_norm": 0.061943668872117996, |
|
"learning_rate": 0.000192408, |
|
"loss": 0.0496, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2889342712351562, |
|
"grad_norm": 0.07192958891391754, |
|
"learning_rate": 0.00019236800000000003, |
|
"loss": 0.05, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.2904470161107329, |
|
"grad_norm": 0.07053862512111664, |
|
"learning_rate": 0.00019232800000000002, |
|
"loss": 0.0504, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.29195976098630966, |
|
"grad_norm": 0.06491555273532867, |
|
"learning_rate": 0.000192288, |
|
"loss": 0.0478, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.2934725058618864, |
|
"grad_norm": 0.06389233469963074, |
|
"learning_rate": 0.000192248, |
|
"loss": 0.0469, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.2949852507374631, |
|
"grad_norm": 0.06336333602666855, |
|
"learning_rate": 0.000192208, |
|
"loss": 0.0472, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.29649799561303986, |
|
"grad_norm": 0.06351201981306076, |
|
"learning_rate": 0.00019216800000000002, |
|
"loss": 0.0459, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.2980107404886166, |
|
"grad_norm": 0.0773550271987915, |
|
"learning_rate": 0.00019212800000000001, |
|
"loss": 0.0435, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.2995234853641933, |
|
"grad_norm": 0.07999245822429657, |
|
"learning_rate": 0.000192088, |
|
"loss": 0.051, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.30103623023977005, |
|
"grad_norm": 0.05664638802409172, |
|
"learning_rate": 0.000192048, |
|
"loss": 0.0493, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.3025489751153468, |
|
"grad_norm": 0.050149012356996536, |
|
"learning_rate": 0.00019200800000000002, |
|
"loss": 0.0491, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3025489751153468, |
|
"eval_cer": 0.10787543886957575, |
|
"eval_loss": 0.042158011347055435, |
|
"eval_runtime": 10458.1763, |
|
"eval_samples_per_second": 2.013, |
|
"eval_steps_per_second": 0.252, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3040617199909235, |
|
"grad_norm": 0.06383787840604782, |
|
"learning_rate": 0.00019196800000000002, |
|
"loss": 0.0421, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.30557446486650025, |
|
"grad_norm": 0.05740641430020332, |
|
"learning_rate": 0.000191928, |
|
"loss": 0.0499, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.307087209742077, |
|
"grad_norm": 0.07163075357675552, |
|
"learning_rate": 0.000191888, |
|
"loss": 0.0431, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.3085999546176537, |
|
"grad_norm": 0.05976075306534767, |
|
"learning_rate": 0.000191848, |
|
"loss": 0.0476, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.31011269949323045, |
|
"grad_norm": 0.0871894434094429, |
|
"learning_rate": 0.00019180800000000002, |
|
"loss": 0.0449, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.3116254443688072, |
|
"grad_norm": 0.07474277913570404, |
|
"learning_rate": 0.000191768, |
|
"loss": 0.0422, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.3131381892443839, |
|
"grad_norm": 0.05594407767057419, |
|
"learning_rate": 0.00019172800000000003, |
|
"loss": 0.0479, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.31465093411996065, |
|
"grad_norm": 0.06565164029598236, |
|
"learning_rate": 0.000191688, |
|
"loss": 0.0501, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.3161636789955374, |
|
"grad_norm": 0.07224603742361069, |
|
"learning_rate": 0.000191648, |
|
"loss": 0.0474, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.3176764238711141, |
|
"grad_norm": 0.07781083881855011, |
|
"learning_rate": 0.000191608, |
|
"loss": 0.0401, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.31918916874669084, |
|
"grad_norm": 0.08147955685853958, |
|
"learning_rate": 0.000191568, |
|
"loss": 0.0486, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.32070191362226763, |
|
"grad_norm": 0.05572337657213211, |
|
"learning_rate": 0.00019152800000000003, |
|
"loss": 0.0488, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.32221465849784436, |
|
"grad_norm": 0.06601813435554504, |
|
"learning_rate": 0.000191488, |
|
"loss": 0.0466, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.3237274033734211, |
|
"grad_norm": 0.057904861867427826, |
|
"learning_rate": 0.00019144800000000001, |
|
"loss": 0.0479, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.32524014824899783, |
|
"grad_norm": 0.057231709361076355, |
|
"learning_rate": 0.000191408, |
|
"loss": 0.0522, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.32675289312457456, |
|
"grad_norm": 0.08306867629289627, |
|
"learning_rate": 0.000191368, |
|
"loss": 0.0439, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.3282656380001513, |
|
"grad_norm": 0.0742512047290802, |
|
"learning_rate": 0.00019132800000000002, |
|
"loss": 0.0434, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.329778382875728, |
|
"grad_norm": 0.07260335236787796, |
|
"learning_rate": 0.000191288, |
|
"loss": 0.0505, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.33129112775130476, |
|
"grad_norm": 0.07398936152458191, |
|
"learning_rate": 0.000191248, |
|
"loss": 0.0519, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.3328038726268815, |
|
"grad_norm": 0.069728784263134, |
|
"learning_rate": 0.000191208, |
|
"loss": 0.0501, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3328038726268815, |
|
"eval_cer": 0.07287520414693144, |
|
"eval_loss": 0.041937489062547684, |
|
"eval_runtime": 10449.7877, |
|
"eval_samples_per_second": 2.015, |
|
"eval_steps_per_second": 0.252, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3343166175024582, |
|
"grad_norm": 0.07778773456811905, |
|
"learning_rate": 0.00019116800000000002, |
|
"loss": 0.0485, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.33582936237803496, |
|
"grad_norm": 0.08489017933607101, |
|
"learning_rate": 0.00019112800000000002, |
|
"loss": 0.047, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.3373421072536117, |
|
"grad_norm": 0.0746629610657692, |
|
"learning_rate": 0.000191088, |
|
"loss": 0.0444, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.3388548521291884, |
|
"grad_norm": 0.07858649641275406, |
|
"learning_rate": 0.000191048, |
|
"loss": 0.0537, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.34036759700476515, |
|
"grad_norm": 0.08357574045658112, |
|
"learning_rate": 0.000191008, |
|
"loss": 0.054, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 0.05976574867963791, |
|
"learning_rate": 0.00019096800000000002, |
|
"loss": 0.0465, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.3433930867559186, |
|
"grad_norm": 0.07549616694450378, |
|
"learning_rate": 0.000190928, |
|
"loss": 0.0479, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.34490583163149535, |
|
"grad_norm": 0.07128783315420151, |
|
"learning_rate": 0.000190888, |
|
"loss": 0.0481, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.3464185765070721, |
|
"grad_norm": 0.05093182995915413, |
|
"learning_rate": 0.000190848, |
|
"loss": 0.039, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.3479313213826488, |
|
"grad_norm": 0.07213055342435837, |
|
"learning_rate": 0.00019080800000000002, |
|
"loss": 0.0486, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.34944406625822555, |
|
"grad_norm": 0.08296896517276764, |
|
"learning_rate": 0.00019076800000000001, |
|
"loss": 0.0436, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.3509568111338023, |
|
"grad_norm": 0.05904708430171013, |
|
"learning_rate": 0.000190728, |
|
"loss": 0.0457, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.352469556009379, |
|
"grad_norm": 0.07709085941314697, |
|
"learning_rate": 0.000190688, |
|
"loss": 0.0456, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 0.061139535158872604, |
|
"learning_rate": 0.000190648, |
|
"loss": 0.0484, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.3554950457605325, |
|
"grad_norm": 0.11013538390398026, |
|
"learning_rate": 0.00019060800000000002, |
|
"loss": 0.0463, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.3570077906361092, |
|
"grad_norm": 0.04920123890042305, |
|
"learning_rate": 0.000190568, |
|
"loss": 0.0404, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.35852053551168594, |
|
"grad_norm": 0.05916327238082886, |
|
"learning_rate": 0.00019052800000000003, |
|
"loss": 0.0506, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.3600332803872627, |
|
"grad_norm": 0.08169171214103699, |
|
"learning_rate": 0.000190488, |
|
"loss": 0.0422, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.3615460252628394, |
|
"grad_norm": 0.07195686548948288, |
|
"learning_rate": 0.00019044800000000002, |
|
"loss": 0.0476, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.36305877013841614, |
|
"grad_norm": 0.06132512912154198, |
|
"learning_rate": 0.000190408, |
|
"loss": 0.0451, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.36305877013841614, |
|
"eval_cer": 0.22885396051223894, |
|
"eval_loss": 0.04164993762969971, |
|
"eval_runtime": 10444.7845, |
|
"eval_samples_per_second": 2.016, |
|
"eval_steps_per_second": 0.252, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3645715150139929, |
|
"grad_norm": 0.06889329850673676, |
|
"learning_rate": 0.000190368, |
|
"loss": 0.0536, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.3660842598895696, |
|
"grad_norm": 0.06513672322034836, |
|
"learning_rate": 0.00019032800000000002, |
|
"loss": 0.0472, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.36759700476514634, |
|
"grad_norm": 0.06588304787874222, |
|
"learning_rate": 0.000190288, |
|
"loss": 0.046, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.3691097496407231, |
|
"grad_norm": 0.07162468135356903, |
|
"learning_rate": 0.000190248, |
|
"loss": 0.0444, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.3706224945162998, |
|
"grad_norm": 0.05831474810838699, |
|
"learning_rate": 0.000190208, |
|
"loss": 0.0448, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.37213523939187654, |
|
"grad_norm": 0.11214031279087067, |
|
"learning_rate": 0.000190168, |
|
"loss": 0.0491, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.37364798426745327, |
|
"grad_norm": 0.07672178000211716, |
|
"learning_rate": 0.00019012800000000002, |
|
"loss": 0.0489, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.37516072914303, |
|
"grad_norm": 0.07850979268550873, |
|
"learning_rate": 0.000190088, |
|
"loss": 0.047, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.37667347401860674, |
|
"grad_norm": 0.0473526194691658, |
|
"learning_rate": 0.000190048, |
|
"loss": 0.0436, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.3781862188941835, |
|
"grad_norm": 0.08313214778900146, |
|
"learning_rate": 0.000190008, |
|
"loss": 0.0457, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.37969896376976026, |
|
"grad_norm": 0.07851678878068924, |
|
"learning_rate": 0.00018996800000000002, |
|
"loss": 0.0399, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.381211708645337, |
|
"grad_norm": 0.06067463755607605, |
|
"learning_rate": 0.00018992800000000002, |
|
"loss": 0.0406, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.3827244535209137, |
|
"grad_norm": 0.07291869819164276, |
|
"learning_rate": 0.000189888, |
|
"loss": 0.0411, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.38423719839649045, |
|
"grad_norm": 0.05576318874955177, |
|
"learning_rate": 0.000189848, |
|
"loss": 0.0412, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.3857499432720672, |
|
"grad_norm": 0.05669853091239929, |
|
"learning_rate": 0.000189808, |
|
"loss": 0.0462, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.3872626881476439, |
|
"grad_norm": 0.0653596743941307, |
|
"learning_rate": 0.00018976800000000002, |
|
"loss": 0.0504, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.38877543302322065, |
|
"grad_norm": 0.07938168197870255, |
|
"learning_rate": 0.000189728, |
|
"loss": 0.0423, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.3902881778987974, |
|
"grad_norm": 0.19600598514080048, |
|
"learning_rate": 0.000189688, |
|
"loss": 0.0422, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.3918009227743741, |
|
"grad_norm": 0.08753781765699387, |
|
"learning_rate": 0.000189648, |
|
"loss": 0.0485, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.39331366764995085, |
|
"grad_norm": 0.07059615105390549, |
|
"learning_rate": 0.00018960800000000002, |
|
"loss": 0.0441, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.39331366764995085, |
|
"eval_cer": 0.12797016798729038, |
|
"eval_loss": 0.040877681225538254, |
|
"eval_runtime": 10426.9488, |
|
"eval_samples_per_second": 2.019, |
|
"eval_steps_per_second": 0.252, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3948264125255276, |
|
"grad_norm": 0.07426866888999939, |
|
"learning_rate": 0.000189568, |
|
"loss": 0.0456, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.3963391574011043, |
|
"grad_norm": 0.05869770795106888, |
|
"learning_rate": 0.000189528, |
|
"loss": 0.047, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.39785190227668105, |
|
"grad_norm": 0.09353045374155045, |
|
"learning_rate": 0.000189488, |
|
"loss": 0.0457, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.3993646471522578, |
|
"grad_norm": 0.083396315574646, |
|
"learning_rate": 0.000189448, |
|
"loss": 0.0441, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.4008773920278345, |
|
"grad_norm": 0.0698527917265892, |
|
"learning_rate": 0.000189408, |
|
"loss": 0.0469, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.40239013690341124, |
|
"grad_norm": 0.07554033398628235, |
|
"learning_rate": 0.000189368, |
|
"loss": 0.0523, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.403902881778988, |
|
"grad_norm": 0.08026187121868134, |
|
"learning_rate": 0.00018932800000000003, |
|
"loss": 0.0492, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.4054156266545647, |
|
"grad_norm": 0.0758117213845253, |
|
"learning_rate": 0.000189288, |
|
"loss": 0.0471, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.40692837153014144, |
|
"grad_norm": 0.0716470330953598, |
|
"learning_rate": 0.00018924800000000001, |
|
"loss": 0.0401, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.4084411164057182, |
|
"grad_norm": 0.07114976644515991, |
|
"learning_rate": 0.000189208, |
|
"loss": 0.0483, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.4099538612812949, |
|
"grad_norm": 0.059242133051157, |
|
"learning_rate": 0.000189168, |
|
"loss": 0.0416, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.41146660615687164, |
|
"grad_norm": 0.07214327901601791, |
|
"learning_rate": 0.00018912800000000002, |
|
"loss": 0.0446, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.41297935103244837, |
|
"grad_norm": 0.0404672808945179, |
|
"learning_rate": 0.000189088, |
|
"loss": 0.0445, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.4144920959080251, |
|
"grad_norm": 0.06663410365581512, |
|
"learning_rate": 0.000189048, |
|
"loss": 0.0435, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.41600484078360184, |
|
"grad_norm": 0.0690486952662468, |
|
"learning_rate": 0.000189008, |
|
"loss": 0.048, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.41751758565917857, |
|
"grad_norm": 0.07034830003976822, |
|
"learning_rate": 0.00018896800000000002, |
|
"loss": 0.0423, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.4190303305347553, |
|
"grad_norm": 0.08420894294977188, |
|
"learning_rate": 0.00018892800000000002, |
|
"loss": 0.0525, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.42054307541033203, |
|
"grad_norm": 0.07617480307817459, |
|
"learning_rate": 0.000188888, |
|
"loss": 0.0492, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.42205582028590877, |
|
"grad_norm": 0.06841789186000824, |
|
"learning_rate": 0.000188848, |
|
"loss": 0.0427, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.4235685651614855, |
|
"grad_norm": 0.07013357430696487, |
|
"learning_rate": 0.000188808, |
|
"loss": 0.04, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.4235685651614855, |
|
"eval_cer": 0.26005539454405746, |
|
"eval_loss": 0.04089580848813057, |
|
"eval_runtime": 10530.3682, |
|
"eval_samples_per_second": 1.999, |
|
"eval_steps_per_second": 0.25, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.42508131003706223, |
|
"grad_norm": 0.06432001292705536, |
|
"learning_rate": 0.00018876800000000002, |
|
"loss": 0.0402, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.42659405491263896, |
|
"grad_norm": 0.06437406688928604, |
|
"learning_rate": 0.000188728, |
|
"loss": 0.0397, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.4281067997882157, |
|
"grad_norm": 0.0579422190785408, |
|
"learning_rate": 0.000188688, |
|
"loss": 0.0431, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.42961954466379243, |
|
"grad_norm": 0.0628400593996048, |
|
"learning_rate": 0.000188648, |
|
"loss": 0.0426, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.43113228953936916, |
|
"grad_norm": 0.04976367950439453, |
|
"learning_rate": 0.000188608, |
|
"loss": 0.0448, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.4326450344149459, |
|
"grad_norm": 0.07479149103164673, |
|
"learning_rate": 0.00018856800000000001, |
|
"loss": 0.0458, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.4341577792905226, |
|
"grad_norm": 0.06853318214416504, |
|
"learning_rate": 0.000188528, |
|
"loss": 0.045, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.43567052416609936, |
|
"grad_norm": 0.08534535765647888, |
|
"learning_rate": 0.00018848800000000003, |
|
"loss": 0.044, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.43718326904167615, |
|
"grad_norm": 0.05148012563586235, |
|
"learning_rate": 0.000188448, |
|
"loss": 0.0448, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.4386960139172529, |
|
"grad_norm": 0.073714479804039, |
|
"learning_rate": 0.00018840800000000002, |
|
"loss": 0.0388, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.4402087587928296, |
|
"grad_norm": 0.06875050067901611, |
|
"learning_rate": 0.000188368, |
|
"loss": 0.0476, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.44172150366840635, |
|
"grad_norm": 0.07048488408327103, |
|
"learning_rate": 0.000188328, |
|
"loss": 0.0537, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.4432342485439831, |
|
"grad_norm": 0.06159156188368797, |
|
"learning_rate": 0.00018828800000000002, |
|
"loss": 0.0523, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.4447469934195598, |
|
"grad_norm": 0.0851297378540039, |
|
"learning_rate": 0.000188248, |
|
"loss": 0.0466, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.44625973829513654, |
|
"grad_norm": 0.07920840382575989, |
|
"learning_rate": 0.000188208, |
|
"loss": 0.0434, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.4477724831707133, |
|
"grad_norm": 0.06767392158508301, |
|
"learning_rate": 0.000188168, |
|
"loss": 0.0446, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.44928522804629, |
|
"grad_norm": 0.0621979758143425, |
|
"learning_rate": 0.00018812800000000003, |
|
"loss": 0.0514, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.45079797292186674, |
|
"grad_norm": 0.06485885381698608, |
|
"learning_rate": 0.00018808800000000002, |
|
"loss": 0.0403, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.4523107177974435, |
|
"grad_norm": 0.07618974149227142, |
|
"learning_rate": 0.000188048, |
|
"loss": 0.046, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.4538234626730202, |
|
"grad_norm": 0.050627488642930984, |
|
"learning_rate": 0.000188008, |
|
"loss": 0.04, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4538234626730202, |
|
"eval_cer": 0.027385337988253985, |
|
"eval_loss": 0.0410909466445446, |
|
"eval_runtime": 11737.0194, |
|
"eval_samples_per_second": 1.794, |
|
"eval_steps_per_second": 0.224, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.45533620754859694, |
|
"grad_norm": 0.07569224387407303, |
|
"learning_rate": 0.000187968, |
|
"loss": 0.0453, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.45684895242417367, |
|
"grad_norm": 0.06267885118722916, |
|
"learning_rate": 0.00018792800000000002, |
|
"loss": 0.0519, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.4583616972997504, |
|
"grad_norm": 0.0801217257976532, |
|
"learning_rate": 0.00018788800000000001, |
|
"loss": 0.0452, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.45987444217532714, |
|
"grad_norm": 0.06966337561607361, |
|
"learning_rate": 0.000187848, |
|
"loss": 0.0459, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.46138718705090387, |
|
"grad_norm": 0.05708028003573418, |
|
"learning_rate": 0.000187808, |
|
"loss": 0.0462, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.4628999319264806, |
|
"grad_norm": 0.06033516675233841, |
|
"learning_rate": 0.00018776800000000002, |
|
"loss": 0.0459, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.46441267680205733, |
|
"grad_norm": 0.06908197700977325, |
|
"learning_rate": 0.00018772800000000002, |
|
"loss": 0.048, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.46592542167763407, |
|
"grad_norm": 0.0723978653550148, |
|
"learning_rate": 0.000187688, |
|
"loss": 0.047, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.4674381665532108, |
|
"grad_norm": 0.06268727034330368, |
|
"learning_rate": 0.000187648, |
|
"loss": 0.0387, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.46895091142878753, |
|
"grad_norm": 0.06796183437108994, |
|
"learning_rate": 0.000187608, |
|
"loss": 0.0379, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.47046365630436426, |
|
"grad_norm": 0.08227751404047012, |
|
"learning_rate": 0.00018756800000000002, |
|
"loss": 0.0497, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.471976401179941, |
|
"grad_norm": 0.06391087174415588, |
|
"learning_rate": 0.000187528, |
|
"loss": 0.045, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.47348914605551773, |
|
"grad_norm": 0.09645809978246689, |
|
"learning_rate": 0.00018748800000000003, |
|
"loss": 0.0479, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.47500189093109446, |
|
"grad_norm": 0.07187838107347488, |
|
"learning_rate": 0.000187448, |
|
"loss": 0.0438, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.4765146358066712, |
|
"grad_norm": 0.06578271836042404, |
|
"learning_rate": 0.00018740800000000002, |
|
"loss": 0.0471, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.4780273806822479, |
|
"grad_norm": 0.06598031520843506, |
|
"learning_rate": 0.000187368, |
|
"loss": 0.0463, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.47954012555782466, |
|
"grad_norm": 0.06380560249090195, |
|
"learning_rate": 0.000187328, |
|
"loss": 0.0439, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.4810528704334014, |
|
"grad_norm": 0.05300907790660858, |
|
"learning_rate": 0.00018728800000000003, |
|
"loss": 0.0385, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.4825656153089781, |
|
"grad_norm": 0.08515879511833191, |
|
"learning_rate": 0.000187248, |
|
"loss": 0.0444, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.48407836018455486, |
|
"grad_norm": 0.0779171735048294, |
|
"learning_rate": 0.00018720800000000001, |
|
"loss": 0.0453, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.48407836018455486, |
|
"eval_cer": 0.010036246117811001, |
|
"eval_loss": 0.04116720333695412, |
|
"eval_runtime": 10575.268, |
|
"eval_samples_per_second": 1.991, |
|
"eval_steps_per_second": 0.249, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.4855911050601316, |
|
"grad_norm": 0.07719563692808151, |
|
"learning_rate": 0.000187168, |
|
"loss": 0.0516, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.4871038499357083, |
|
"grad_norm": 0.0623527429997921, |
|
"learning_rate": 0.000187128, |
|
"loss": 0.0412, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.48861659481128505, |
|
"grad_norm": 0.05286158621311188, |
|
"learning_rate": 0.00018708800000000002, |
|
"loss": 0.0433, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.4901293396868618, |
|
"grad_norm": 0.05317120626568794, |
|
"learning_rate": 0.000187048, |
|
"loss": 0.0451, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.4916420845624385, |
|
"grad_norm": 0.06447257846593857, |
|
"learning_rate": 0.000187008, |
|
"loss": 0.0552, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.49315482943801525, |
|
"grad_norm": 0.05432993173599243, |
|
"learning_rate": 0.000186968, |
|
"loss": 0.0454, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.49466757431359204, |
|
"grad_norm": 0.07853369414806366, |
|
"learning_rate": 0.00018692800000000002, |
|
"loss": 0.0513, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.4961803191891688, |
|
"grad_norm": 0.07532196491956711, |
|
"learning_rate": 0.00018688800000000002, |
|
"loss": 0.0494, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.4976930640647455, |
|
"grad_norm": 0.0591423436999321, |
|
"learning_rate": 0.000186848, |
|
"loss": 0.0406, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.49920580894032224, |
|
"grad_norm": 0.05588558688759804, |
|
"learning_rate": 0.000186808, |
|
"loss": 0.0454, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5007185538158989, |
|
"grad_norm": 0.06208329647779465, |
|
"learning_rate": 0.000186768, |
|
"loss": 0.0379, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.5022312986914756, |
|
"grad_norm": 0.09954684972763062, |
|
"learning_rate": 0.00018672800000000002, |
|
"loss": 0.0441, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.5037440435670524, |
|
"grad_norm": 0.06522241979837418, |
|
"learning_rate": 0.000186688, |
|
"loss": 0.0435, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.5052567884426291, |
|
"grad_norm": 0.06771814823150635, |
|
"learning_rate": 0.000186648, |
|
"loss": 0.0407, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.5067695333182058, |
|
"grad_norm": 0.09186646342277527, |
|
"learning_rate": 0.000186608, |
|
"loss": 0.0468, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.5082822781937826, |
|
"grad_norm": 0.05741488188505173, |
|
"learning_rate": 0.00018656800000000002, |
|
"loss": 0.0427, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.5097950230693593, |
|
"grad_norm": 0.078957200050354, |
|
"learning_rate": 0.00018652800000000001, |
|
"loss": 0.0524, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.511307767944936, |
|
"grad_norm": 0.06480754166841507, |
|
"learning_rate": 0.000186488, |
|
"loss": 0.0491, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 0.07016266882419586, |
|
"learning_rate": 0.000186448, |
|
"loss": 0.0455, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.5143332576960895, |
|
"grad_norm": 0.09549427777528763, |
|
"learning_rate": 0.000186408, |
|
"loss": 0.0435, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5143332576960895, |
|
"eval_cer": 0.06014582453123417, |
|
"eval_loss": 0.040756821632385254, |
|
"eval_runtime": 10458.365, |
|
"eval_samples_per_second": 2.013, |
|
"eval_steps_per_second": 0.252, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5158460025716662, |
|
"grad_norm": 0.06771855056285858, |
|
"learning_rate": 0.00018636800000000002, |
|
"loss": 0.0496, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.517358747447243, |
|
"grad_norm": 0.051270436495542526, |
|
"learning_rate": 0.000186328, |
|
"loss": 0.0376, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.5188714923228197, |
|
"grad_norm": 0.05424557998776436, |
|
"learning_rate": 0.00018628800000000003, |
|
"loss": 0.0455, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.5203842371983964, |
|
"grad_norm": 0.07000952959060669, |
|
"learning_rate": 0.000186248, |
|
"loss": 0.0494, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.5218969820739732, |
|
"grad_norm": 0.06696450710296631, |
|
"learning_rate": 0.00018620800000000002, |
|
"loss": 0.0449, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.52340972694955, |
|
"grad_norm": 0.07243742048740387, |
|
"learning_rate": 0.000186168, |
|
"loss": 0.0481, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.5249224718251267, |
|
"grad_norm": 0.07457748800516129, |
|
"learning_rate": 0.000186128, |
|
"loss": 0.0413, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.5264352167007035, |
|
"grad_norm": 0.05373325198888779, |
|
"learning_rate": 0.00018608800000000002, |
|
"loss": 0.046, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.5279479615762802, |
|
"grad_norm": 0.07769589871168137, |
|
"learning_rate": 0.000186048, |
|
"loss": 0.0443, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.5294607064518569, |
|
"grad_norm": 0.05949350818991661, |
|
"learning_rate": 0.000186008, |
|
"loss": 0.0426, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5309734513274337, |
|
"grad_norm": 0.08557622879743576, |
|
"learning_rate": 0.000185968, |
|
"loss": 0.0436, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.5324861962030104, |
|
"grad_norm": 0.07504332065582275, |
|
"learning_rate": 0.00018592800000000003, |
|
"loss": 0.045, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5339989410785871, |
|
"grad_norm": 0.08510497957468033, |
|
"learning_rate": 0.00018588800000000002, |
|
"loss": 0.0451, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.5355116859541639, |
|
"grad_norm": 0.06645802408456802, |
|
"learning_rate": 0.000185848, |
|
"loss": 0.0459, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.5370244308297406, |
|
"grad_norm": 0.05905970185995102, |
|
"learning_rate": 0.000185808, |
|
"loss": 0.0431, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.5385371757053173, |
|
"grad_norm": 0.059341125190258026, |
|
"learning_rate": 0.000185768, |
|
"loss": 0.0521, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.5400499205808941, |
|
"grad_norm": 0.07676515728235245, |
|
"learning_rate": 0.00018572800000000002, |
|
"loss": 0.0446, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.5415626654564708, |
|
"grad_norm": 0.05860384181141853, |
|
"learning_rate": 0.00018568800000000002, |
|
"loss": 0.041, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.5430754103320475, |
|
"grad_norm": 0.07133147865533829, |
|
"learning_rate": 0.000185648, |
|
"loss": 0.0479, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.5445881552076243, |
|
"grad_norm": 0.058478474617004395, |
|
"learning_rate": 0.000185608, |
|
"loss": 0.0447, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5445881552076243, |
|
"eval_cer": 0.16368877753976077, |
|
"eval_loss": 0.04047335311770439, |
|
"eval_runtime": 10446.0422, |
|
"eval_samples_per_second": 2.015, |
|
"eval_steps_per_second": 0.252, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.546100900083201, |
|
"grad_norm": 0.06725309789180756, |
|
"learning_rate": 0.000185568, |
|
"loss": 0.053, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.5476136449587777, |
|
"grad_norm": 0.06334862858057022, |
|
"learning_rate": 0.00018552800000000002, |
|
"loss": 0.0451, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.5491263898343545, |
|
"grad_norm": 0.12283937633037567, |
|
"learning_rate": 0.000185488, |
|
"loss": 0.0437, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.5506391347099312, |
|
"grad_norm": 0.05931037664413452, |
|
"learning_rate": 0.000185448, |
|
"loss": 0.0431, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.5521518795855079, |
|
"grad_norm": 0.05501909554004669, |
|
"learning_rate": 0.000185408, |
|
"loss": 0.0398, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.5536646244610847, |
|
"grad_norm": 0.06066635251045227, |
|
"learning_rate": 0.00018536800000000002, |
|
"loss": 0.0497, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.5551773693366614, |
|
"grad_norm": 0.1352480947971344, |
|
"learning_rate": 0.000185328, |
|
"loss": 0.0445, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.5566901142122381, |
|
"grad_norm": 0.08712221682071686, |
|
"learning_rate": 0.000185288, |
|
"loss": 0.0485, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.5582028590878149, |
|
"grad_norm": 0.06511665135622025, |
|
"learning_rate": 0.000185248, |
|
"loss": 0.0464, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.5597156039633916, |
|
"grad_norm": 0.052760981023311615, |
|
"learning_rate": 0.000185208, |
|
"loss": 0.0417, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.5612283488389683, |
|
"grad_norm": 0.05113260820508003, |
|
"learning_rate": 0.000185168, |
|
"loss": 0.0426, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.5627410937145451, |
|
"grad_norm": 0.06565012037754059, |
|
"learning_rate": 0.000185128, |
|
"loss": 0.0397, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.5642538385901218, |
|
"grad_norm": 0.0608823299407959, |
|
"learning_rate": 0.00018508800000000003, |
|
"loss": 0.0411, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.5657665834656985, |
|
"grad_norm": 0.0670706033706665, |
|
"learning_rate": 0.000185048, |
|
"loss": 0.0495, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.5672793283412753, |
|
"grad_norm": 0.07000606507062912, |
|
"learning_rate": 0.00018500800000000001, |
|
"loss": 0.0457, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.568792073216852, |
|
"grad_norm": 0.08072007447481155, |
|
"learning_rate": 0.000184968, |
|
"loss": 0.0484, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.5703048180924287, |
|
"grad_norm": 0.06795356422662735, |
|
"learning_rate": 0.000184928, |
|
"loss": 0.0495, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.5718175629680055, |
|
"grad_norm": 0.3031274974346161, |
|
"learning_rate": 0.00018488800000000002, |
|
"loss": 0.0504, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.5733303078435822, |
|
"grad_norm": 0.05166814848780632, |
|
"learning_rate": 0.000184848, |
|
"loss": 0.0442, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.5748430527191589, |
|
"grad_norm": 0.08816450089216232, |
|
"learning_rate": 0.000184808, |
|
"loss": 0.0525, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5748430527191589, |
|
"eval_cer": 0.09852050611143642, |
|
"eval_loss": 0.041136305779218674, |
|
"eval_runtime": 10432.1011, |
|
"eval_samples_per_second": 2.018, |
|
"eval_steps_per_second": 0.252, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5763557975947357, |
|
"grad_norm": 0.06531400233507156, |
|
"learning_rate": 0.000184768, |
|
"loss": 0.0459, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.5778685424703124, |
|
"grad_norm": 0.07049426436424255, |
|
"learning_rate": 0.00018472800000000002, |
|
"loss": 0.0386, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.5793812873458891, |
|
"grad_norm": 0.07954803854227066, |
|
"learning_rate": 0.00018468800000000002, |
|
"loss": 0.0451, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.5808940322214659, |
|
"grad_norm": 0.07543455064296722, |
|
"learning_rate": 0.000184648, |
|
"loss": 0.0406, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.5824067770970426, |
|
"grad_norm": 0.08292882144451141, |
|
"learning_rate": 0.000184608, |
|
"loss": 0.0544, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.5839195219726193, |
|
"grad_norm": 0.05814971402287483, |
|
"learning_rate": 0.000184568, |
|
"loss": 0.0441, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.585432266848196, |
|
"grad_norm": 0.06112606078386307, |
|
"learning_rate": 0.00018452800000000002, |
|
"loss": 0.0482, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.5869450117237728, |
|
"grad_norm": 0.08487452566623688, |
|
"learning_rate": 0.000184488, |
|
"loss": 0.0446, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.5884577565993495, |
|
"grad_norm": 0.05025780200958252, |
|
"learning_rate": 0.000184448, |
|
"loss": 0.0453, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.5899705014749262, |
|
"grad_norm": 0.10276935994625092, |
|
"learning_rate": 0.000184408, |
|
"loss": 0.0427, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.591483246350503, |
|
"grad_norm": 0.11926810443401337, |
|
"learning_rate": 0.000184368, |
|
"loss": 0.0472, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.5929959912260797, |
|
"grad_norm": 0.08615875244140625, |
|
"learning_rate": 0.00018432800000000001, |
|
"loss": 0.0504, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.5945087361016564, |
|
"grad_norm": 0.05418393015861511, |
|
"learning_rate": 0.000184288, |
|
"loss": 0.0397, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.5960214809772332, |
|
"grad_norm": 0.06980731338262558, |
|
"learning_rate": 0.000184248, |
|
"loss": 0.0407, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.5975342258528099, |
|
"grad_norm": 0.07121722400188446, |
|
"learning_rate": 0.000184208, |
|
"loss": 0.0441, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.5990469707283866, |
|
"grad_norm": 0.05750627815723419, |
|
"learning_rate": 0.00018416800000000002, |
|
"loss": 0.049, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.6005597156039634, |
|
"grad_norm": 0.08207126706838608, |
|
"learning_rate": 0.000184128, |
|
"loss": 0.0475, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.6020724604795401, |
|
"grad_norm": 0.07319646328687668, |
|
"learning_rate": 0.000184088, |
|
"loss": 0.0517, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.6035852053551168, |
|
"grad_norm": 0.06762152910232544, |
|
"learning_rate": 0.000184048, |
|
"loss": 0.042, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.6050979502306936, |
|
"grad_norm": 0.05603775382041931, |
|
"learning_rate": 0.000184008, |
|
"loss": 0.0434, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6050979502306936, |
|
"eval_cer": 0.2283245991802003, |
|
"eval_loss": 0.03986261412501335, |
|
"eval_runtime": 10464.7689, |
|
"eval_samples_per_second": 2.012, |
|
"eval_steps_per_second": 0.252, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6066106951062703, |
|
"grad_norm": 0.05094938725233078, |
|
"learning_rate": 0.000183968, |
|
"loss": 0.0493, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.608123439981847, |
|
"grad_norm": 0.08996951580047607, |
|
"learning_rate": 0.000183928, |
|
"loss": 0.0475, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.6096361848574238, |
|
"grad_norm": 0.07369961589574814, |
|
"learning_rate": 0.00018388800000000003, |
|
"loss": 0.0441, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.6111489297330005, |
|
"grad_norm": 0.06135983020067215, |
|
"learning_rate": 0.000183848, |
|
"loss": 0.0421, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.6126616746085772, |
|
"grad_norm": 0.04601254314184189, |
|
"learning_rate": 0.000183808, |
|
"loss": 0.037, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.614174419484154, |
|
"grad_norm": 0.04949349910020828, |
|
"learning_rate": 0.000183768, |
|
"loss": 0.0424, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.6156871643597307, |
|
"grad_norm": 0.08714490383863449, |
|
"learning_rate": 0.000183728, |
|
"loss": 0.0459, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.6171999092353074, |
|
"grad_norm": 0.07733121514320374, |
|
"learning_rate": 0.00018368800000000002, |
|
"loss": 0.0423, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.6187126541108842, |
|
"grad_norm": 0.070652537047863, |
|
"learning_rate": 0.000183648, |
|
"loss": 0.0417, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.6202253989864609, |
|
"grad_norm": 0.08538975566625595, |
|
"learning_rate": 0.000183608, |
|
"loss": 0.045, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.6217381438620376, |
|
"grad_norm": 0.07866961508989334, |
|
"learning_rate": 0.000183568, |
|
"loss": 0.0435, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.6232508887376144, |
|
"grad_norm": 0.052214980125427246, |
|
"learning_rate": 0.00018352800000000002, |
|
"loss": 0.0389, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.6247636336131911, |
|
"grad_norm": 0.07548975199460983, |
|
"learning_rate": 0.00018348800000000002, |
|
"loss": 0.0406, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.6262763784887678, |
|
"grad_norm": 0.06064745783805847, |
|
"learning_rate": 0.000183448, |
|
"loss": 0.0405, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.6277891233643446, |
|
"grad_norm": 0.06255548447370529, |
|
"learning_rate": 0.000183408, |
|
"loss": 0.0426, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.6293018682399213, |
|
"grad_norm": 0.05550558492541313, |
|
"learning_rate": 0.000183368, |
|
"loss": 0.0432, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.630814613115498, |
|
"grad_norm": 0.06224781274795532, |
|
"learning_rate": 0.00018332800000000002, |
|
"loss": 0.0489, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.6323273579910748, |
|
"grad_norm": 0.04567689448595047, |
|
"learning_rate": 0.000183288, |
|
"loss": 0.0392, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.6338401028666515, |
|
"grad_norm": 0.08686509728431702, |
|
"learning_rate": 0.00018324800000000003, |
|
"loss": 0.0503, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.6353528477422282, |
|
"grad_norm": 0.039897847920656204, |
|
"learning_rate": 0.000183208, |
|
"loss": 0.0437, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.6353528477422282, |
|
"eval_cer": 0.0028697931722888917, |
|
"eval_loss": 0.03980256989598274, |
|
"eval_runtime": 10439.5254, |
|
"eval_samples_per_second": 2.017, |
|
"eval_steps_per_second": 0.252, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.636865592617805, |
|
"grad_norm": 0.07222657650709152, |
|
"learning_rate": 0.00018316800000000002, |
|
"loss": 0.0445, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.6383783374933817, |
|
"grad_norm": 0.06796406954526901, |
|
"learning_rate": 0.000183128, |
|
"loss": 0.0452, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.6398910823689585, |
|
"grad_norm": 0.07380914688110352, |
|
"learning_rate": 0.000183088, |
|
"loss": 0.0456, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.6414038272445353, |
|
"grad_norm": 0.05780802294611931, |
|
"learning_rate": 0.00018304800000000003, |
|
"loss": 0.043, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.642916572120112, |
|
"grad_norm": 0.07155787944793701, |
|
"learning_rate": 0.000183008, |
|
"loss": 0.0422, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.6444293169956887, |
|
"grad_norm": 0.06419336050748825, |
|
"learning_rate": 0.00018296800000000001, |
|
"loss": 0.0453, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.6459420618712655, |
|
"grad_norm": 0.06702402234077454, |
|
"learning_rate": 0.000182928, |
|
"loss": 0.0416, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.6474548067468422, |
|
"grad_norm": 0.062247395515441895, |
|
"learning_rate": 0.00018288800000000003, |
|
"loss": 0.0431, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.6489675516224189, |
|
"grad_norm": 0.05556045100092888, |
|
"learning_rate": 0.00018284800000000002, |
|
"loss": 0.0542, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.6504802964979957, |
|
"grad_norm": 0.07586701959371567, |
|
"learning_rate": 0.000182808, |
|
"loss": 0.0476, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.6519930413735724, |
|
"grad_norm": 0.056563302874565125, |
|
"learning_rate": 0.000182768, |
|
"loss": 0.0441, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.6535057862491491, |
|
"grad_norm": 0.08210831135511398, |
|
"learning_rate": 0.000182728, |
|
"loss": 0.0428, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.6550185311247259, |
|
"grad_norm": 0.06154036149382591, |
|
"learning_rate": 0.00018268800000000002, |
|
"loss": 0.0437, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.6565312760003026, |
|
"grad_norm": 0.06387040764093399, |
|
"learning_rate": 0.00018264800000000002, |
|
"loss": 0.0503, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.6580440208758793, |
|
"grad_norm": 0.07460694015026093, |
|
"learning_rate": 0.000182608, |
|
"loss": 0.0388, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.659556765751456, |
|
"grad_norm": 0.05871427804231644, |
|
"learning_rate": 0.000182568, |
|
"loss": 0.0409, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.6610695106270328, |
|
"grad_norm": 0.05525946244597435, |
|
"learning_rate": 0.000182528, |
|
"loss": 0.0403, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.6625822555026095, |
|
"grad_norm": 0.07400190085172653, |
|
"learning_rate": 0.00018248800000000002, |
|
"loss": 0.0544, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.6640950003781863, |
|
"grad_norm": 0.05236358568072319, |
|
"learning_rate": 0.000182448, |
|
"loss": 0.0424, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.665607745253763, |
|
"grad_norm": 0.07223962247371674, |
|
"learning_rate": 0.000182408, |
|
"loss": 0.0427, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.665607745253763, |
|
"eval_cer": 0.22895526186399429, |
|
"eval_loss": 0.039881668984889984, |
|
"eval_runtime": 10486.5948, |
|
"eval_samples_per_second": 2.008, |
|
"eval_steps_per_second": 0.251, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.6671204901293397, |
|
"grad_norm": 0.04777299240231514, |
|
"learning_rate": 0.000182368, |
|
"loss": 0.0365, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.6686332350049164, |
|
"grad_norm": 0.06789238750934601, |
|
"learning_rate": 0.00018232800000000002, |
|
"loss": 0.041, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.6701459798804932, |
|
"grad_norm": 0.07556366920471191, |
|
"learning_rate": 0.00018228800000000001, |
|
"loss": 0.0454, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.6716587247560699, |
|
"grad_norm": 0.05699057877063751, |
|
"learning_rate": 0.000182248, |
|
"loss": 0.0412, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.6731714696316466, |
|
"grad_norm": 0.06115678697824478, |
|
"learning_rate": 0.000182208, |
|
"loss": 0.0494, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.6746842145072234, |
|
"grad_norm": 0.16907750070095062, |
|
"learning_rate": 0.000182168, |
|
"loss": 0.0457, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.6761969593828001, |
|
"grad_norm": 0.23710806667804718, |
|
"learning_rate": 0.00018212800000000002, |
|
"loss": 0.0491, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.6777097042583768, |
|
"grad_norm": 0.13006287813186646, |
|
"learning_rate": 0.000182088, |
|
"loss": 0.0528, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.6792224491339536, |
|
"grad_norm": 0.24661995470523834, |
|
"learning_rate": 0.00018204800000000003, |
|
"loss": 0.043, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.6807351940095303, |
|
"grad_norm": 0.2757125198841095, |
|
"learning_rate": 0.000182008, |
|
"loss": 0.0477, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.682247938885107, |
|
"grad_norm": 0.27585530281066895, |
|
"learning_rate": 0.00018196800000000002, |
|
"loss": 0.0486, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 0.10548703372478485, |
|
"learning_rate": 0.000181928, |
|
"loss": 0.0448, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.6852734286362605, |
|
"grad_norm": 0.1989259272813797, |
|
"learning_rate": 0.000181888, |
|
"loss": 0.0508, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.6867861735118372, |
|
"grad_norm": 0.10586623847484589, |
|
"learning_rate": 0.00018184800000000002, |
|
"loss": 0.0486, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.688298918387414, |
|
"grad_norm": 0.09687965363264084, |
|
"learning_rate": 0.000181808, |
|
"loss": 0.0463, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.6898116632629907, |
|
"grad_norm": 0.13362692296504974, |
|
"learning_rate": 0.000181768, |
|
"loss": 0.0441, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.6913244081385674, |
|
"grad_norm": 0.07124081254005432, |
|
"learning_rate": 0.000181728, |
|
"loss": 0.0479, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.6928371530141442, |
|
"grad_norm": 0.060886889696121216, |
|
"learning_rate": 0.00018168800000000003, |
|
"loss": 0.0425, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.6943498978897209, |
|
"grad_norm": 0.09697773307561874, |
|
"learning_rate": 0.00018164800000000002, |
|
"loss": 0.0466, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.6958626427652976, |
|
"grad_norm": 0.09655246883630753, |
|
"learning_rate": 0.00018160800000000001, |
|
"loss": 0.0423, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.6958626427652976, |
|
"eval_cer": 0.3264485475609846, |
|
"eval_loss": 0.04431215673685074, |
|
"eval_runtime": 9966.6677, |
|
"eval_samples_per_second": 2.112, |
|
"eval_steps_per_second": 0.264, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.6973753876408744, |
|
"grad_norm": 0.6920335292816162, |
|
"learning_rate": 0.000181568, |
|
"loss": 0.0612, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.6988881325164511, |
|
"grad_norm": 21.773630142211914, |
|
"learning_rate": 0.00018153600000000002, |
|
"loss": 0.3452, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.7004008773920278, |
|
"grad_norm": 0.6047945022583008, |
|
"learning_rate": 0.0001815, |
|
"loss": 0.8043, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.7019136222676046, |
|
"grad_norm": 0.30588680505752563, |
|
"learning_rate": 0.00018146000000000001, |
|
"loss": 0.094, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.7034263671431813, |
|
"grad_norm": 2.5436811447143555, |
|
"learning_rate": 0.00018142, |
|
"loss": 0.1421, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.704939112018758, |
|
"grad_norm": 3.3921713829040527, |
|
"learning_rate": 0.00018138000000000003, |
|
"loss": 0.2285, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.7064518568943348, |
|
"grad_norm": 6.751514434814453, |
|
"learning_rate": 0.00018134, |
|
"loss": 0.1609, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 0.2919982075691223, |
|
"learning_rate": 0.00018130000000000002, |
|
"loss": 0.0731, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.7094773466454882, |
|
"grad_norm": 0.2757503092288971, |
|
"learning_rate": 0.00018126, |
|
"loss": 0.0553, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.710990091521065, |
|
"grad_norm": 0.12121643126010895, |
|
"learning_rate": 0.00018122, |
|
"loss": 0.0637, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.7125028363966417, |
|
"grad_norm": 0.6880851984024048, |
|
"learning_rate": 0.00018118000000000002, |
|
"loss": 0.0556, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.7140155812722184, |
|
"grad_norm": 0.17397326231002808, |
|
"learning_rate": 0.00018114, |
|
"loss": 0.0619, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.7155283261477952, |
|
"grad_norm": 0.4361652433872223, |
|
"learning_rate": 0.0001811, |
|
"loss": 0.052, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.7170410710233719, |
|
"grad_norm": 0.08802498877048492, |
|
"learning_rate": 0.00018106, |
|
"loss": 0.0531, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.7185538158989486, |
|
"grad_norm": 0.16508696973323822, |
|
"learning_rate": 0.00018102000000000003, |
|
"loss": 0.0519, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.7200665607745254, |
|
"grad_norm": 0.1359723061323166, |
|
"learning_rate": 0.00018098000000000002, |
|
"loss": 0.0559, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.7215793056501021, |
|
"grad_norm": 0.12716355919837952, |
|
"learning_rate": 0.00018093999999999999, |
|
"loss": 0.0478, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.7230920505256788, |
|
"grad_norm": 0.24563723802566528, |
|
"learning_rate": 0.0001809, |
|
"loss": 0.0508, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.7246047954012556, |
|
"grad_norm": 0.15526343882083893, |
|
"learning_rate": 0.00018086, |
|
"loss": 0.053, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.7261175402768323, |
|
"grad_norm": 0.39961257576942444, |
|
"learning_rate": 0.00018082000000000002, |
|
"loss": 0.0543, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7261175402768323, |
|
"eval_cer": 0.8969592299120654, |
|
"eval_loss": 0.04724743589758873, |
|
"eval_runtime": 9508.4862, |
|
"eval_samples_per_second": 2.214, |
|
"eval_steps_per_second": 0.277, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.727630285152409, |
|
"grad_norm": 0.11674599349498749, |
|
"learning_rate": 0.00018078000000000001, |
|
"loss": 0.045, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.7291430300279858, |
|
"grad_norm": 0.12775878608226776, |
|
"learning_rate": 0.00018074, |
|
"loss": 0.0507, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.7306557749035625, |
|
"grad_norm": 0.21720856428146362, |
|
"learning_rate": 0.0001807, |
|
"loss": 0.0507, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.7321685197791392, |
|
"grad_norm": 0.09953787177801132, |
|
"learning_rate": 0.00018066, |
|
"loss": 0.0455, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.733681264654716, |
|
"grad_norm": 0.1652969866991043, |
|
"learning_rate": 0.00018062000000000002, |
|
"loss": 0.058, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.7351940095302927, |
|
"grad_norm": 0.15136420726776123, |
|
"learning_rate": 0.00018058, |
|
"loss": 0.0403, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.7367067544058694, |
|
"grad_norm": 0.09294873476028442, |
|
"learning_rate": 0.00018054, |
|
"loss": 0.0454, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.7382194992814461, |
|
"grad_norm": 0.06313528120517731, |
|
"learning_rate": 0.0001805, |
|
"loss": 0.0486, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.7397322441570229, |
|
"grad_norm": 0.10854914039373398, |
|
"learning_rate": 0.00018046000000000002, |
|
"loss": 0.0419, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.7412449890325996, |
|
"grad_norm": 0.08302963525056839, |
|
"learning_rate": 0.00018042, |
|
"loss": 0.0447, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.7427577339081763, |
|
"grad_norm": 0.0761631429195404, |
|
"learning_rate": 0.00018038, |
|
"loss": 0.0446, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.7442704787837531, |
|
"grad_norm": 0.10130470246076584, |
|
"learning_rate": 0.00018034, |
|
"loss": 0.045, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.7457832236593298, |
|
"grad_norm": 0.18436622619628906, |
|
"learning_rate": 0.0001803, |
|
"loss": 0.0429, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.7472959685349065, |
|
"grad_norm": 0.08756496757268906, |
|
"learning_rate": 0.00018026, |
|
"loss": 0.0444, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.7488087134104833, |
|
"grad_norm": 0.0750514343380928, |
|
"learning_rate": 0.00018022, |
|
"loss": 0.0507, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.75032145828606, |
|
"grad_norm": 0.07460404187440872, |
|
"learning_rate": 0.00018018000000000003, |
|
"loss": 0.0397, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.7518342031616367, |
|
"grad_norm": 0.12696300446987152, |
|
"learning_rate": 0.00018014, |
|
"loss": 0.0412, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.7533469480372135, |
|
"grad_norm": 0.09411120414733887, |
|
"learning_rate": 0.00018010000000000001, |
|
"loss": 0.0431, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.7548596929127902, |
|
"grad_norm": 0.08611701428890228, |
|
"learning_rate": 0.00018006, |
|
"loss": 0.041, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.756372437788367, |
|
"grad_norm": 0.07411106675863266, |
|
"learning_rate": 0.00018002, |
|
"loss": 0.0448, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.756372437788367, |
|
"eval_cer": 0.9283299113242558, |
|
"eval_loss": 0.0398402214050293, |
|
"eval_runtime": 9972.2961, |
|
"eval_samples_per_second": 2.111, |
|
"eval_steps_per_second": 0.264, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7578851826639438, |
|
"grad_norm": 0.06552145630121231, |
|
"learning_rate": 0.00017998000000000002, |
|
"loss": 0.0411, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.7593979275395205, |
|
"grad_norm": 0.14544987678527832, |
|
"learning_rate": 0.00017994000000000002, |
|
"loss": 0.0401, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.7609106724150972, |
|
"grad_norm": 0.06693132221698761, |
|
"learning_rate": 0.0001799, |
|
"loss": 0.045, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.762423417290674, |
|
"grad_norm": 0.08100226521492004, |
|
"learning_rate": 0.00017986, |
|
"loss": 0.0478, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.7639361621662507, |
|
"grad_norm": 0.10020666569471359, |
|
"learning_rate": 0.00017982000000000002, |
|
"loss": 0.0484, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.7654489070418274, |
|
"grad_norm": 0.055785536766052246, |
|
"learning_rate": 0.00017978000000000002, |
|
"loss": 0.0423, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.7669616519174042, |
|
"grad_norm": 0.08791428059339523, |
|
"learning_rate": 0.00017974, |
|
"loss": 0.0433, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.7684743967929809, |
|
"grad_norm": 0.10156507045030594, |
|
"learning_rate": 0.0001797, |
|
"loss": 0.0447, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.7699871416685576, |
|
"grad_norm": 0.1160702183842659, |
|
"learning_rate": 0.00017966, |
|
"loss": 0.0388, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.7714998865441344, |
|
"grad_norm": 0.08716849237680435, |
|
"learning_rate": 0.00017962000000000002, |
|
"loss": 0.0492, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.7730126314197111, |
|
"grad_norm": 0.046968474984169006, |
|
"learning_rate": 0.00017958, |
|
"loss": 0.0434, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.7745253762952878, |
|
"grad_norm": 0.06234806030988693, |
|
"learning_rate": 0.00017954000000000003, |
|
"loss": 0.0504, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.7760381211708646, |
|
"grad_norm": 0.102174311876297, |
|
"learning_rate": 0.0001795, |
|
"loss": 0.044, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.7775508660464413, |
|
"grad_norm": 0.0620570033788681, |
|
"learning_rate": 0.00017946, |
|
"loss": 0.0386, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.779063610922018, |
|
"grad_norm": 0.057656314224004745, |
|
"learning_rate": 0.00017942, |
|
"loss": 0.043, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.7805763557975948, |
|
"grad_norm": 0.08451346307992935, |
|
"learning_rate": 0.00017938, |
|
"loss": 0.0452, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.7820891006731715, |
|
"grad_norm": 0.09557165950536728, |
|
"learning_rate": 0.00017934000000000003, |
|
"loss": 0.0437, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.7836018455487482, |
|
"grad_norm": 0.12275496870279312, |
|
"learning_rate": 0.0001793, |
|
"loss": 0.0427, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.785114590424325, |
|
"grad_norm": 0.3277435600757599, |
|
"learning_rate": 0.00017926000000000002, |
|
"loss": 0.045, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.7866273352999017, |
|
"grad_norm": 0.12806734442710876, |
|
"learning_rate": 0.00017922, |
|
"loss": 0.0383, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.7866273352999017, |
|
"eval_cer": 0.8426215554451947, |
|
"eval_loss": 0.03898792341351509, |
|
"eval_runtime": 10404.4584, |
|
"eval_samples_per_second": 2.023, |
|
"eval_steps_per_second": 0.253, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.7881400801754784, |
|
"grad_norm": 0.07969816774129868, |
|
"learning_rate": 0.00017918, |
|
"loss": 0.0474, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.7896528250510552, |
|
"grad_norm": 0.20492368936538696, |
|
"learning_rate": 0.00017914000000000002, |
|
"loss": 0.0423, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.7911655699266319, |
|
"grad_norm": 0.0960281640291214, |
|
"learning_rate": 0.0001791, |
|
"loss": 0.0392, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.7926783148022086, |
|
"grad_norm": 0.16566351056098938, |
|
"learning_rate": 0.00017906, |
|
"loss": 0.0415, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.7941910596777854, |
|
"grad_norm": 0.12343327701091766, |
|
"learning_rate": 0.00017902, |
|
"loss": 0.0439, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.7957038045533621, |
|
"grad_norm": 0.0732201486825943, |
|
"learning_rate": 0.00017898000000000002, |
|
"loss": 0.0462, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.7972165494289388, |
|
"grad_norm": 0.07991164177656174, |
|
"learning_rate": 0.00017894000000000002, |
|
"loss": 0.0412, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.7987292943045156, |
|
"grad_norm": 0.07868771255016327, |
|
"learning_rate": 0.0001789, |
|
"loss": 0.0458, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.8002420391800923, |
|
"grad_norm": 0.07392987608909607, |
|
"learning_rate": 0.00017886, |
|
"loss": 0.0489, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.801754784055669, |
|
"grad_norm": 0.08330372720956802, |
|
"learning_rate": 0.00017882, |
|
"loss": 0.0448, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.8032675289312458, |
|
"grad_norm": 0.06118497997522354, |
|
"learning_rate": 0.00017878000000000002, |
|
"loss": 0.0406, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.8047802738068225, |
|
"grad_norm": 0.14288772642612457, |
|
"learning_rate": 0.00017874, |
|
"loss": 0.0439, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.8062930186823992, |
|
"grad_norm": 0.06868502497673035, |
|
"learning_rate": 0.0001787, |
|
"loss": 0.0439, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.807805763557976, |
|
"grad_norm": 0.08165542781352997, |
|
"learning_rate": 0.00017866, |
|
"loss": 0.0449, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.8093185084335527, |
|
"grad_norm": 0.08748511224985123, |
|
"learning_rate": 0.00017862000000000002, |
|
"loss": 0.0455, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.8108312533091294, |
|
"grad_norm": 0.0799604058265686, |
|
"learning_rate": 0.00017858000000000001, |
|
"loss": 0.0466, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.8123439981847062, |
|
"grad_norm": 0.09606848657131195, |
|
"learning_rate": 0.00017854, |
|
"loss": 0.0452, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.8138567430602829, |
|
"grad_norm": 0.07232715934515, |
|
"learning_rate": 0.0001785, |
|
"loss": 0.0426, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.8153694879358596, |
|
"grad_norm": 0.07278240472078323, |
|
"learning_rate": 0.00017846, |
|
"loss": 0.0468, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.8168822328114363, |
|
"grad_norm": 0.06568820029497147, |
|
"learning_rate": 0.00017842000000000002, |
|
"loss": 0.0407, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.8168822328114363, |
|
"eval_cer": 0.9304918304165957, |
|
"eval_loss": 0.039248276501894, |
|
"eval_runtime": 10433.9841, |
|
"eval_samples_per_second": 2.018, |
|
"eval_steps_per_second": 0.252, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.8183949776870131, |
|
"grad_norm": 0.08667409420013428, |
|
"learning_rate": 0.00017838, |
|
"loss": 0.0504, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.8199077225625898, |
|
"grad_norm": 0.0701778307557106, |
|
"learning_rate": 0.00017834000000000003, |
|
"loss": 0.0425, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.8214204674381665, |
|
"grad_norm": 0.07078663259744644, |
|
"learning_rate": 0.0001783, |
|
"loss": 0.0456, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.8229332123137433, |
|
"grad_norm": 0.08540530502796173, |
|
"learning_rate": 0.00017826000000000002, |
|
"loss": 0.0437, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.82444595718932, |
|
"grad_norm": 0.044258490204811096, |
|
"learning_rate": 0.00017822, |
|
"loss": 0.0373, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.8259587020648967, |
|
"grad_norm": 0.08837467432022095, |
|
"learning_rate": 0.00017818, |
|
"loss": 0.0418, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.8274714469404735, |
|
"grad_norm": 0.06399261206388474, |
|
"learning_rate": 0.00017814000000000003, |
|
"loss": 0.0461, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.8289841918160502, |
|
"grad_norm": 0.07160426676273346, |
|
"learning_rate": 0.0001781, |
|
"loss": 0.0384, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.8304969366916269, |
|
"grad_norm": 0.06335125118494034, |
|
"learning_rate": 0.00017806, |
|
"loss": 0.04, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.8320096815672037, |
|
"grad_norm": 0.10239727795124054, |
|
"learning_rate": 0.00017802, |
|
"loss": 0.0396, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8335224264427804, |
|
"grad_norm": 0.06797724217176437, |
|
"learning_rate": 0.00017798, |
|
"loss": 0.0406, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.8350351713183571, |
|
"grad_norm": 0.08448281139135361, |
|
"learning_rate": 0.00017794000000000002, |
|
"loss": 0.0489, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.8365479161939339, |
|
"grad_norm": 0.0817868560552597, |
|
"learning_rate": 0.0001779, |
|
"loss": 0.0437, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.8380606610695106, |
|
"grad_norm": 0.12232506275177002, |
|
"learning_rate": 0.00017786, |
|
"loss": 0.0475, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.8395734059450873, |
|
"grad_norm": 0.0839553102850914, |
|
"learning_rate": 0.00017782, |
|
"loss": 0.0447, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.8410861508206641, |
|
"grad_norm": 0.07315023243427277, |
|
"learning_rate": 0.00017778000000000002, |
|
"loss": 0.0441, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.8425988956962408, |
|
"grad_norm": 0.07943390309810638, |
|
"learning_rate": 0.00017774000000000002, |
|
"loss": 0.0457, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.8441116405718175, |
|
"grad_norm": 0.07185439020395279, |
|
"learning_rate": 0.0001777, |
|
"loss": 0.0429, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.8456243854473943, |
|
"grad_norm": 0.06304585933685303, |
|
"learning_rate": 0.00017766, |
|
"loss": 0.046, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.847137130322971, |
|
"grad_norm": 0.07005342841148376, |
|
"learning_rate": 0.00017762, |
|
"loss": 0.0359, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.847137130322971, |
|
"eval_cer": 0.5003496132017898, |
|
"eval_loss": 0.038213107734918594, |
|
"eval_runtime": 10454.3437, |
|
"eval_samples_per_second": 2.014, |
|
"eval_steps_per_second": 0.252, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.8486498751985477, |
|
"grad_norm": 0.08005109429359436, |
|
"learning_rate": 0.00017758000000000002, |
|
"loss": 0.0491, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.8501626200741245, |
|
"grad_norm": 0.07554598152637482, |
|
"learning_rate": 0.00017754, |
|
"loss": 0.0384, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.8516753649497012, |
|
"grad_norm": 0.08396964520215988, |
|
"learning_rate": 0.0001775, |
|
"loss": 0.0439, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.8531881098252779, |
|
"grad_norm": 0.08719771355390549, |
|
"learning_rate": 0.00017746, |
|
"loss": 0.0417, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"grad_norm": 0.09563528001308441, |
|
"learning_rate": 0.00017742000000000002, |
|
"loss": 0.0456, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.8562135995764314, |
|
"grad_norm": 0.07019315659999847, |
|
"learning_rate": 0.00017738, |
|
"loss": 0.0394, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.8577263444520081, |
|
"grad_norm": 0.06756678968667984, |
|
"learning_rate": 0.00017734, |
|
"loss": 0.046, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.8592390893275849, |
|
"grad_norm": 0.06660816073417664, |
|
"learning_rate": 0.0001773, |
|
"loss": 0.0415, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.8607518342031616, |
|
"grad_norm": 0.10737419873476028, |
|
"learning_rate": 0.00017726, |
|
"loss": 0.0402, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.8622645790787383, |
|
"grad_norm": 0.06818167865276337, |
|
"learning_rate": 0.00017722000000000001, |
|
"loss": 0.039, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.8637773239543151, |
|
"grad_norm": 0.05077315866947174, |
|
"learning_rate": 0.00017718, |
|
"loss": 0.0376, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.8652900688298918, |
|
"grad_norm": 0.08248795568943024, |
|
"learning_rate": 0.00017714000000000003, |
|
"loss": 0.0427, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.8668028137054685, |
|
"grad_norm": 0.06273633241653442, |
|
"learning_rate": 0.0001771, |
|
"loss": 0.0405, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.8683155585810453, |
|
"grad_norm": 0.11920665949583054, |
|
"learning_rate": 0.00017706000000000002, |
|
"loss": 0.0416, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.869828303456622, |
|
"grad_norm": 0.061835162341594696, |
|
"learning_rate": 0.00017702, |
|
"loss": 0.0456, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.8713410483321987, |
|
"grad_norm": 0.06891065835952759, |
|
"learning_rate": 0.00017698, |
|
"loss": 0.0435, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.8728537932077756, |
|
"grad_norm": 0.06323794275522232, |
|
"learning_rate": 0.00017694000000000002, |
|
"loss": 0.0424, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.8743665380833523, |
|
"grad_norm": 0.08218410611152649, |
|
"learning_rate": 0.0001769, |
|
"loss": 0.0428, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.875879282958929, |
|
"grad_norm": 0.05943075567483902, |
|
"learning_rate": 0.00017686, |
|
"loss": 0.0373, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.8773920278345058, |
|
"grad_norm": 0.09316141158342361, |
|
"learning_rate": 0.00017682, |
|
"loss": 0.0436, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.8773920278345058, |
|
"eval_cer": 0.5988355286077488, |
|
"eval_loss": 0.0380551740527153, |
|
"eval_runtime": 10439.6932, |
|
"eval_samples_per_second": 2.017, |
|
"eval_steps_per_second": 0.252, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.8789047727100825, |
|
"grad_norm": 0.06791754812002182, |
|
"learning_rate": 0.00017678000000000003, |
|
"loss": 0.0424, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.8804175175856592, |
|
"grad_norm": 0.06572896242141724, |
|
"learning_rate": 0.00017674000000000002, |
|
"loss": 0.0446, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.881930262461236, |
|
"grad_norm": 0.07208286970853806, |
|
"learning_rate": 0.00017669999999999999, |
|
"loss": 0.0438, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.8834430073368127, |
|
"grad_norm": 0.08518756181001663, |
|
"learning_rate": 0.00017666, |
|
"loss": 0.0401, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 0.060736026614904404, |
|
"learning_rate": 0.00017662, |
|
"loss": 0.0393, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.8864684970879662, |
|
"grad_norm": 0.0627061128616333, |
|
"learning_rate": 0.00017658000000000002, |
|
"loss": 0.0358, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.8879812419635429, |
|
"grad_norm": 0.06178157031536102, |
|
"learning_rate": 0.00017654000000000001, |
|
"loss": 0.0467, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.8894939868391196, |
|
"grad_norm": 0.0688227042555809, |
|
"learning_rate": 0.0001765, |
|
"loss": 0.0415, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.8910067317146964, |
|
"grad_norm": 0.06773985177278519, |
|
"learning_rate": 0.00017646, |
|
"loss": 0.0354, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.8925194765902731, |
|
"grad_norm": 0.09130257368087769, |
|
"learning_rate": 0.00017642, |
|
"loss": 0.0414, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.8940322214658498, |
|
"grad_norm": 0.06815651059150696, |
|
"learning_rate": 0.00017638000000000002, |
|
"loss": 0.0495, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.8955449663414266, |
|
"grad_norm": 0.07239062339067459, |
|
"learning_rate": 0.00017634, |
|
"loss": 0.0459, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.8970577112170033, |
|
"grad_norm": 0.08951979130506516, |
|
"learning_rate": 0.0001763, |
|
"loss": 0.047, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.89857045609258, |
|
"grad_norm": 0.07267329841852188, |
|
"learning_rate": 0.00017626, |
|
"loss": 0.0384, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.9000832009681567, |
|
"grad_norm": 0.06272245943546295, |
|
"learning_rate": 0.00017622000000000002, |
|
"loss": 0.0373, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.9015959458437335, |
|
"grad_norm": 0.07484642416238785, |
|
"learning_rate": 0.00017618, |
|
"loss": 0.0445, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.9031086907193102, |
|
"grad_norm": 0.06894571334123611, |
|
"learning_rate": 0.00017614, |
|
"loss": 0.0418, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.904621435594887, |
|
"grad_norm": 0.07352825254201889, |
|
"learning_rate": 0.0001761, |
|
"loss": 0.0361, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.9061341804704637, |
|
"grad_norm": 0.07955580949783325, |
|
"learning_rate": 0.00017606, |
|
"loss": 0.0418, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.9076469253460404, |
|
"grad_norm": 0.057830698788166046, |
|
"learning_rate": 0.00017602, |
|
"loss": 0.0359, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9076469253460404, |
|
"eval_cer": 0.5058427407698408, |
|
"eval_loss": 0.038296379148960114, |
|
"eval_runtime": 10426.1739, |
|
"eval_samples_per_second": 2.019, |
|
"eval_steps_per_second": 0.252, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9091596702216171, |
|
"grad_norm": 0.08560307323932648, |
|
"learning_rate": 0.00017598, |
|
"loss": 0.0465, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.9106724150971939, |
|
"grad_norm": 0.06908106803894043, |
|
"learning_rate": 0.00017594000000000003, |
|
"loss": 0.0469, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.9121851599727706, |
|
"grad_norm": 0.058405641466379166, |
|
"learning_rate": 0.0001759, |
|
"loss": 0.0459, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.9136979048483473, |
|
"grad_norm": 0.06696103513240814, |
|
"learning_rate": 0.00017586000000000001, |
|
"loss": 0.0389, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.9152106497239241, |
|
"grad_norm": 0.06927672773599625, |
|
"learning_rate": 0.00017582, |
|
"loss": 0.0369, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.9167233945995008, |
|
"grad_norm": 0.11847919970750809, |
|
"learning_rate": 0.00017578, |
|
"loss": 0.0379, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.9182361394750775, |
|
"grad_norm": 0.06731213629245758, |
|
"learning_rate": 0.00017574000000000002, |
|
"loss": 0.0492, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.9197488843506543, |
|
"grad_norm": 0.06238566339015961, |
|
"learning_rate": 0.0001757, |
|
"loss": 0.0351, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.921261629226231, |
|
"grad_norm": 0.07023432850837708, |
|
"learning_rate": 0.00017566, |
|
"loss": 0.0418, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.9227743741018077, |
|
"grad_norm": 0.07269687950611115, |
|
"learning_rate": 0.00017562, |
|
"loss": 0.0473, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.9242871189773845, |
|
"grad_norm": 0.0714830756187439, |
|
"learning_rate": 0.00017558000000000002, |
|
"loss": 0.0419, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.9257998638529612, |
|
"grad_norm": 0.06455916166305542, |
|
"learning_rate": 0.00017554000000000002, |
|
"loss": 0.0386, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.9273126087285379, |
|
"grad_norm": 0.0797223374247551, |
|
"learning_rate": 0.0001755, |
|
"loss": 0.0425, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.9288253536041147, |
|
"grad_norm": 0.08360251039266586, |
|
"learning_rate": 0.00017546, |
|
"loss": 0.0414, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.9303380984796914, |
|
"grad_norm": 0.06491956114768982, |
|
"learning_rate": 0.00017542, |
|
"loss": 0.0367, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.9318508433552681, |
|
"grad_norm": 0.06236764043569565, |
|
"learning_rate": 0.00017538000000000002, |
|
"loss": 0.0514, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.9333635882308449, |
|
"grad_norm": 0.08555632829666138, |
|
"learning_rate": 0.00017534, |
|
"loss": 0.041, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.9348763331064216, |
|
"grad_norm": 0.08949322998523712, |
|
"learning_rate": 0.0001753, |
|
"loss": 0.0462, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.9363890779819983, |
|
"grad_norm": 0.07832244038581848, |
|
"learning_rate": 0.00017526, |
|
"loss": 0.0471, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.9379018228575751, |
|
"grad_norm": 0.06077546253800392, |
|
"learning_rate": 0.00017522000000000002, |
|
"loss": 0.0457, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.9379018228575751, |
|
"eval_cer": 0.3344013213649492, |
|
"eval_loss": 0.03830147907137871, |
|
"eval_runtime": 10461.8882, |
|
"eval_samples_per_second": 2.012, |
|
"eval_steps_per_second": 0.252, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.9394145677331518, |
|
"grad_norm": 0.048287175595760345, |
|
"learning_rate": 0.00017518, |
|
"loss": 0.0393, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.9409273126087285, |
|
"grad_norm": 0.08072841167449951, |
|
"learning_rate": 0.00017514, |
|
"loss": 0.0447, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.9424400574843053, |
|
"grad_norm": 0.07255307585000992, |
|
"learning_rate": 0.0001751, |
|
"loss": 0.0492, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.943952802359882, |
|
"grad_norm": 0.05136171355843544, |
|
"learning_rate": 0.00017506, |
|
"loss": 0.0438, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.9454655472354587, |
|
"grad_norm": 0.079404316842556, |
|
"learning_rate": 0.00017502000000000001, |
|
"loss": 0.0383, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.9469782921110355, |
|
"grad_norm": 0.10744167119264603, |
|
"learning_rate": 0.00017498, |
|
"loss": 0.0406, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.9484910369866122, |
|
"grad_norm": 0.09439695626497269, |
|
"learning_rate": 0.00017494, |
|
"loss": 0.0448, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.9500037818621889, |
|
"grad_norm": 0.07746788114309311, |
|
"learning_rate": 0.00017490000000000002, |
|
"loss": 0.0425, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.9515165267377657, |
|
"grad_norm": 0.161416694521904, |
|
"learning_rate": 0.00017486, |
|
"loss": 0.04, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.9530292716133424, |
|
"grad_norm": 0.05279407650232315, |
|
"learning_rate": 0.00017482, |
|
"loss": 0.0387, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.9545420164889191, |
|
"grad_norm": 0.06324402987957001, |
|
"learning_rate": 0.00017478, |
|
"loss": 0.0425, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.9560547613644959, |
|
"grad_norm": 0.08716294914484024, |
|
"learning_rate": 0.00017474000000000002, |
|
"loss": 0.0436, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.9575675062400726, |
|
"grad_norm": 0.08212625980377197, |
|
"learning_rate": 0.00017470000000000002, |
|
"loss": 0.0445, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.9590802511156493, |
|
"grad_norm": 0.08856002241373062, |
|
"learning_rate": 0.00017466, |
|
"loss": 0.0385, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.960592995991226, |
|
"grad_norm": 0.08907803148031235, |
|
"learning_rate": 0.00017462, |
|
"loss": 0.0451, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.9621057408668028, |
|
"grad_norm": 0.053175825625658035, |
|
"learning_rate": 0.00017458, |
|
"loss": 0.0428, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.9636184857423795, |
|
"grad_norm": 0.055600494146347046, |
|
"learning_rate": 0.00017454000000000002, |
|
"loss": 0.047, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.9651312306179562, |
|
"grad_norm": 0.10455228388309479, |
|
"learning_rate": 0.0001745, |
|
"loss": 0.0517, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.966643975493533, |
|
"grad_norm": 0.11780910938978195, |
|
"learning_rate": 0.00017446, |
|
"loss": 0.0414, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.9681567203691097, |
|
"grad_norm": 0.12388743460178375, |
|
"learning_rate": 0.00017442, |
|
"loss": 0.0438, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.9681567203691097, |
|
"eval_cer": 0.5869913004375724, |
|
"eval_loss": 0.03873522952198982, |
|
"eval_runtime": 10437.6142, |
|
"eval_samples_per_second": 2.017, |
|
"eval_steps_per_second": 0.252, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.9696694652446864, |
|
"grad_norm": 0.07916050404310226, |
|
"learning_rate": 0.00017438000000000002, |
|
"loss": 0.0402, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.9711822101202632, |
|
"grad_norm": 0.05646761879324913, |
|
"learning_rate": 0.00017434000000000001, |
|
"loss": 0.0425, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.9726949549958399, |
|
"grad_norm": 0.08374381810426712, |
|
"learning_rate": 0.0001743, |
|
"loss": 0.041, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.9742076998714166, |
|
"grad_norm": 0.06789222359657288, |
|
"learning_rate": 0.00017426, |
|
"loss": 0.0391, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.9757204447469934, |
|
"grad_norm": 0.0788172036409378, |
|
"learning_rate": 0.00017422, |
|
"loss": 0.0449, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.9772331896225701, |
|
"grad_norm": 0.1257173717021942, |
|
"learning_rate": 0.00017418000000000002, |
|
"loss": 0.0484, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.9787459344981468, |
|
"grad_norm": 0.05888710170984268, |
|
"learning_rate": 0.00017414, |
|
"loss": 0.0387, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.9802586793737236, |
|
"grad_norm": 0.07102910429239273, |
|
"learning_rate": 0.00017410000000000003, |
|
"loss": 0.0386, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.9817714242493003, |
|
"grad_norm": 0.058048397302627563, |
|
"learning_rate": 0.00017406, |
|
"loss": 0.0415, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.983284169124877, |
|
"grad_norm": 0.07222626358270645, |
|
"learning_rate": 0.00017402000000000002, |
|
"loss": 0.0378, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9847969140004538, |
|
"grad_norm": 0.06445878744125366, |
|
"learning_rate": 0.00017398, |
|
"loss": 0.0409, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.9863096588760305, |
|
"grad_norm": 0.09191201627254486, |
|
"learning_rate": 0.00017394, |
|
"loss": 0.0414, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.9878224037516072, |
|
"grad_norm": 0.08073204010725021, |
|
"learning_rate": 0.00017390000000000003, |
|
"loss": 0.0404, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.9893351486271841, |
|
"grad_norm": 0.08427068591117859, |
|
"learning_rate": 0.00017386, |
|
"loss": 0.0398, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.9908478935027608, |
|
"grad_norm": 0.19870494306087494, |
|
"learning_rate": 0.00017382, |
|
"loss": 0.0388, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.9923606383783375, |
|
"grad_norm": 0.34985288977622986, |
|
"learning_rate": 0.00017378, |
|
"loss": 0.051, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.9938733832539143, |
|
"grad_norm": 0.12121633440256119, |
|
"learning_rate": 0.00017374000000000003, |
|
"loss": 0.0385, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.995386128129491, |
|
"grad_norm": 0.140520840883255, |
|
"learning_rate": 0.00017370000000000002, |
|
"loss": 0.0417, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.9968988730050677, |
|
"grad_norm": 0.06655796617269516, |
|
"learning_rate": 0.00017366, |
|
"loss": 0.0394, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.9984116178806445, |
|
"grad_norm": 0.07498542964458466, |
|
"learning_rate": 0.00017362, |
|
"loss": 0.0419, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.9984116178806445, |
|
"eval_cer": 0.25282902555511905, |
|
"eval_loss": 0.038411665707826614, |
|
"eval_runtime": 10433.3935, |
|
"eval_samples_per_second": 2.018, |
|
"eval_steps_per_second": 0.252, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.9999243627562212, |
|
"grad_norm": 0.25646254420280457, |
|
"learning_rate": 0.00017358, |
|
"loss": 0.039, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.0014371076317978, |
|
"grad_norm": 0.07744245231151581, |
|
"learning_rate": 0.00017354000000000002, |
|
"loss": 0.0371, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.0029498525073746, |
|
"grad_norm": 0.11968632787466049, |
|
"learning_rate": 0.00017350000000000002, |
|
"loss": 0.0303, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.0044625973829513, |
|
"grad_norm": 0.07235859334468842, |
|
"learning_rate": 0.00017346, |
|
"loss": 0.0387, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.005975342258528, |
|
"grad_norm": 0.12598702311515808, |
|
"learning_rate": 0.00017342, |
|
"loss": 0.0355, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.0074880871341048, |
|
"grad_norm": 0.10832694917917252, |
|
"learning_rate": 0.00017338, |
|
"loss": 0.0297, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.0090008320096815, |
|
"grad_norm": 0.13988302648067474, |
|
"learning_rate": 0.00017334000000000002, |
|
"loss": 0.0352, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.0105135768852582, |
|
"grad_norm": 0.09534142911434174, |
|
"learning_rate": 0.0001733, |
|
"loss": 0.0308, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.012026321760835, |
|
"grad_norm": 0.05622931197285652, |
|
"learning_rate": 0.00017326, |
|
"loss": 0.0311, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.0135390666364117, |
|
"grad_norm": 0.06480368971824646, |
|
"learning_rate": 0.00017322, |
|
"loss": 0.033, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.0150518115119884, |
|
"grad_norm": 0.08531224727630615, |
|
"learning_rate": 0.00017318000000000002, |
|
"loss": 0.0345, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.0165645563875652, |
|
"grad_norm": 0.11494185030460358, |
|
"learning_rate": 0.00017314, |
|
"loss": 0.0292, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.0180773012631419, |
|
"grad_norm": 0.06993953883647919, |
|
"learning_rate": 0.0001731, |
|
"loss": 0.0343, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.0195900461387186, |
|
"grad_norm": 0.09449311345815659, |
|
"learning_rate": 0.00017306, |
|
"loss": 0.0285, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.0211027910142954, |
|
"grad_norm": 0.10550418496131897, |
|
"learning_rate": 0.00017302, |
|
"loss": 0.0337, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.022615535889872, |
|
"grad_norm": 0.06987041234970093, |
|
"learning_rate": 0.00017298000000000001, |
|
"loss": 0.0273, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.0241282807654488, |
|
"grad_norm": 0.08014168590307236, |
|
"learning_rate": 0.00017294, |
|
"loss": 0.0318, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 0.04886119067668915, |
|
"learning_rate": 0.00017290000000000003, |
|
"loss": 0.0318, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.0271537705166023, |
|
"grad_norm": 0.07735268771648407, |
|
"learning_rate": 0.00017286, |
|
"loss": 0.0377, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.028666515392179, |
|
"grad_norm": 0.07365155220031738, |
|
"learning_rate": 0.00017282000000000002, |
|
"loss": 0.0397, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.028666515392179, |
|
"eval_cer": 0.5956908628651482, |
|
"eval_loss": 0.03884879872202873, |
|
"eval_runtime": 10443.3198, |
|
"eval_samples_per_second": 2.016, |
|
"eval_steps_per_second": 0.252, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.0301792602677557, |
|
"grad_norm": 0.08235965669155121, |
|
"learning_rate": 0.00017278, |
|
"loss": 0.0356, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.0316920051433325, |
|
"grad_norm": 0.1203494668006897, |
|
"learning_rate": 0.00017274, |
|
"loss": 0.0391, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.0332047500189092, |
|
"grad_norm": 0.059709157794713974, |
|
"learning_rate": 0.00017270000000000002, |
|
"loss": 0.036, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.034717494894486, |
|
"grad_norm": 0.08380923420190811, |
|
"learning_rate": 0.00017266, |
|
"loss": 0.0311, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.0362302397700627, |
|
"grad_norm": 0.0642111599445343, |
|
"learning_rate": 0.00017262, |
|
"loss": 0.0296, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.0377429846456394, |
|
"grad_norm": 0.07701337337493896, |
|
"learning_rate": 0.00017258, |
|
"loss": 0.0318, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.0392557295212161, |
|
"grad_norm": 0.09674856811761856, |
|
"learning_rate": 0.00017254000000000003, |
|
"loss": 0.0294, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.0407684743967929, |
|
"grad_norm": 0.08543815463781357, |
|
"learning_rate": 0.00017250000000000002, |
|
"loss": 0.0322, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.0422812192723696, |
|
"grad_norm": 0.08181754499673843, |
|
"learning_rate": 0.00017246, |
|
"loss": 0.031, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.0437939641479463, |
|
"grad_norm": 0.07326922565698624, |
|
"learning_rate": 0.00017242, |
|
"loss": 0.0298, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.045306709023523, |
|
"grad_norm": 0.060128018260002136, |
|
"learning_rate": 0.00017238, |
|
"loss": 0.0351, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.0468194538990998, |
|
"grad_norm": 0.055250383913517, |
|
"learning_rate": 0.00017234000000000002, |
|
"loss": 0.0322, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.0483321987746765, |
|
"grad_norm": 0.07841707766056061, |
|
"learning_rate": 0.00017230000000000001, |
|
"loss": 0.0311, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.0498449436502533, |
|
"grad_norm": 0.06094701215624809, |
|
"learning_rate": 0.00017226, |
|
"loss": 0.0331, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.0513576885258302, |
|
"grad_norm": 0.0738435760140419, |
|
"learning_rate": 0.00017222, |
|
"loss": 0.0385, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.052870433401407, |
|
"grad_norm": 0.0741799846291542, |
|
"learning_rate": 0.00017218, |
|
"loss": 0.0332, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.0543831782769837, |
|
"grad_norm": 0.11769600957632065, |
|
"learning_rate": 0.00017214000000000002, |
|
"loss": 0.0288, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.0558959231525604, |
|
"grad_norm": 0.05547551065683365, |
|
"learning_rate": 0.0001721, |
|
"loss": 0.0351, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.0574086680281372, |
|
"grad_norm": 0.059602439403533936, |
|
"learning_rate": 0.00017206, |
|
"loss": 0.0315, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.0589214129037139, |
|
"grad_norm": 0.07523063570261002, |
|
"learning_rate": 0.00017202, |
|
"loss": 0.0344, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.0589214129037139, |
|
"eval_cer": 0.06192848124566072, |
|
"eval_loss": 0.03872867301106453, |
|
"eval_runtime": 10423.0915, |
|
"eval_samples_per_second": 2.02, |
|
"eval_steps_per_second": 0.253, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.0604341577792906, |
|
"grad_norm": 0.07334991544485092, |
|
"learning_rate": 0.00017198000000000002, |
|
"loss": 0.0394, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.0619469026548674, |
|
"grad_norm": 0.08875437080860138, |
|
"learning_rate": 0.00017194, |
|
"loss": 0.0316, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.063459647530444, |
|
"grad_norm": 0.06492207199335098, |
|
"learning_rate": 0.0001719, |
|
"loss": 0.0375, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.0649723924060208, |
|
"grad_norm": 0.08707519620656967, |
|
"learning_rate": 0.00017186, |
|
"loss": 0.0333, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.0664851372815976, |
|
"grad_norm": 0.06477733701467514, |
|
"learning_rate": 0.00017182, |
|
"loss": 0.036, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.0679978821571743, |
|
"grad_norm": 0.05914880335330963, |
|
"learning_rate": 0.00017178, |
|
"loss": 0.0307, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.069510627032751, |
|
"grad_norm": 0.11167873442173004, |
|
"learning_rate": 0.00017174, |
|
"loss": 0.0355, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.0710233719083277, |
|
"grad_norm": 0.08664342761039734, |
|
"learning_rate": 0.00017170000000000003, |
|
"loss": 0.0373, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.0725361167839045, |
|
"grad_norm": 0.06912154704332352, |
|
"learning_rate": 0.00017166, |
|
"loss": 0.0283, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.0740488616594812, |
|
"grad_norm": 0.09120757132768631, |
|
"learning_rate": 0.00017162000000000001, |
|
"loss": 0.0313, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.075561606535058, |
|
"grad_norm": 0.08159112185239792, |
|
"learning_rate": 0.00017158, |
|
"loss": 0.0413, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.0770743514106347, |
|
"grad_norm": 0.095944344997406, |
|
"learning_rate": 0.00017154, |
|
"loss": 0.0355, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.0785870962862114, |
|
"grad_norm": 0.10682930797338486, |
|
"learning_rate": 0.00017150000000000002, |
|
"loss": 0.0278, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.0800998411617881, |
|
"grad_norm": 0.06514004617929459, |
|
"learning_rate": 0.00017146, |
|
"loss": 0.0306, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.0816125860373649, |
|
"grad_norm": 0.07849156856536865, |
|
"learning_rate": 0.00017142, |
|
"loss": 0.0379, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.0831253309129416, |
|
"grad_norm": 0.0788741260766983, |
|
"learning_rate": 0.00017138, |
|
"loss": 0.032, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.0846380757885183, |
|
"grad_norm": 0.10495191067457199, |
|
"learning_rate": 0.00017134000000000002, |
|
"loss": 0.0358, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.086150820664095, |
|
"grad_norm": 0.07463409751653671, |
|
"learning_rate": 0.00017130000000000002, |
|
"loss": 0.0356, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.0876635655396718, |
|
"grad_norm": 0.08425049483776093, |
|
"learning_rate": 0.00017126, |
|
"loss": 0.0327, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.0891763104152485, |
|
"grad_norm": 0.07767146825790405, |
|
"learning_rate": 0.00017122, |
|
"loss": 0.034, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.0891763104152485, |
|
"eval_cer": 0.09758161553419167, |
|
"eval_loss": 0.037929706275463104, |
|
"eval_runtime": 10420.1284, |
|
"eval_samples_per_second": 2.02, |
|
"eval_steps_per_second": 0.253, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.0906890552908253, |
|
"grad_norm": 0.07770776748657227, |
|
"learning_rate": 0.00017118, |
|
"loss": 0.0321, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.092201800166402, |
|
"grad_norm": 0.06977003812789917, |
|
"learning_rate": 0.00017114000000000002, |
|
"loss": 0.0315, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.0937145450419787, |
|
"grad_norm": 0.077842116355896, |
|
"learning_rate": 0.0001711, |
|
"loss": 0.0317, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.0952272899175555, |
|
"grad_norm": 0.11414997279644012, |
|
"learning_rate": 0.00017106, |
|
"loss": 0.0392, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.0967400347931322, |
|
"grad_norm": 0.07568582892417908, |
|
"learning_rate": 0.00017102, |
|
"loss": 0.0369, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.098252779668709, |
|
"grad_norm": 0.07864728569984436, |
|
"learning_rate": 0.00017098000000000002, |
|
"loss": 0.038, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.0997655245442857, |
|
"grad_norm": 0.0852401927113533, |
|
"learning_rate": 0.00017094, |
|
"loss": 0.0323, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.1012782694198624, |
|
"grad_norm": 0.06548303365707397, |
|
"learning_rate": 0.0001709, |
|
"loss": 0.0373, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.1027910142954391, |
|
"grad_norm": 0.10153812170028687, |
|
"learning_rate": 0.00017086, |
|
"loss": 0.0321, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.1043037591710159, |
|
"grad_norm": 0.09032442420721054, |
|
"learning_rate": 0.00017082, |
|
"loss": 0.0306, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.1058165040465926, |
|
"grad_norm": 0.12109789252281189, |
|
"learning_rate": 0.00017078000000000001, |
|
"loss": 0.0355, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.1073292489221693, |
|
"grad_norm": 0.08515240997076035, |
|
"learning_rate": 0.00017074, |
|
"loss": 0.0374, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.108841993797746, |
|
"grad_norm": 0.06838446855545044, |
|
"learning_rate": 0.0001707, |
|
"loss": 0.0309, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.1103547386733228, |
|
"grad_norm": 0.10029911994934082, |
|
"learning_rate": 0.00017066, |
|
"loss": 0.0377, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.1118674835488995, |
|
"grad_norm": 0.08499938994646072, |
|
"learning_rate": 0.00017062, |
|
"loss": 0.0317, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.1133802284244763, |
|
"grad_norm": 0.10972133278846741, |
|
"learning_rate": 0.00017058, |
|
"loss": 0.0344, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.114892973300053, |
|
"grad_norm": 0.06848263740539551, |
|
"learning_rate": 0.00017054, |
|
"loss": 0.0356, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.1164057181756297, |
|
"grad_norm": 0.06813491135835648, |
|
"learning_rate": 0.00017050000000000002, |
|
"loss": 0.0291, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.1179184630512065, |
|
"grad_norm": 0.053215883672237396, |
|
"learning_rate": 0.00017046, |
|
"loss": 0.0297, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.1194312079267832, |
|
"grad_norm": 0.08575928211212158, |
|
"learning_rate": 0.00017042, |
|
"loss": 0.0378, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.1194312079267832, |
|
"eval_cer": 0.05163898174846133, |
|
"eval_loss": 0.03768303617835045, |
|
"eval_runtime": 10418.7834, |
|
"eval_samples_per_second": 2.021, |
|
"eval_steps_per_second": 0.253, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.12094395280236, |
|
"grad_norm": 0.07621601223945618, |
|
"learning_rate": 0.00017038, |
|
"loss": 0.032, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.1224566976779367, |
|
"grad_norm": 0.11499703675508499, |
|
"learning_rate": 0.00017034, |
|
"loss": 0.0331, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.1239694425535134, |
|
"grad_norm": 0.08789568394422531, |
|
"learning_rate": 0.00017030000000000002, |
|
"loss": 0.0332, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.1254821874290901, |
|
"grad_norm": 0.0887342318892479, |
|
"learning_rate": 0.00017025999999999999, |
|
"loss": 0.0374, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.1269949323046669, |
|
"grad_norm": 0.11794856935739517, |
|
"learning_rate": 0.00017022, |
|
"loss": 0.0347, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.1285076771802436, |
|
"grad_norm": 0.07593784481287003, |
|
"learning_rate": 0.00017018, |
|
"loss": 0.0323, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.1300204220558203, |
|
"grad_norm": 0.06868909299373627, |
|
"learning_rate": 0.00017014000000000002, |
|
"loss": 0.0311, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.131533166931397, |
|
"grad_norm": 0.1010032370686531, |
|
"learning_rate": 0.00017010000000000001, |
|
"loss": 0.0333, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.1330459118069738, |
|
"grad_norm": 0.08664656430482864, |
|
"learning_rate": 0.00017006, |
|
"loss": 0.0358, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.1345586566825505, |
|
"grad_norm": 0.09153386205434799, |
|
"learning_rate": 0.00017002, |
|
"loss": 0.0288, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.1360714015581272, |
|
"grad_norm": 0.10042116045951843, |
|
"learning_rate": 0.00016998, |
|
"loss": 0.0324, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.137584146433704, |
|
"grad_norm": 0.09703629463911057, |
|
"learning_rate": 0.00016994000000000002, |
|
"loss": 0.0356, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.1390968913092807, |
|
"grad_norm": 0.07961410284042358, |
|
"learning_rate": 0.0001699, |
|
"loss": 0.0279, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.1406096361848574, |
|
"grad_norm": 0.09164062142372131, |
|
"learning_rate": 0.00016986000000000003, |
|
"loss": 0.033, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.1421223810604342, |
|
"grad_norm": 0.0804910659790039, |
|
"learning_rate": 0.00016982, |
|
"loss": 0.033, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.143635125936011, |
|
"grad_norm": 0.07923970371484756, |
|
"learning_rate": 0.00016978000000000002, |
|
"loss": 0.0366, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.1451478708115876, |
|
"grad_norm": 0.1198810487985611, |
|
"learning_rate": 0.00016974, |
|
"loss": 0.0361, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.1466606156871644, |
|
"grad_norm": 0.08409520238637924, |
|
"learning_rate": 0.0001697, |
|
"loss": 0.0323, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.148173360562741, |
|
"grad_norm": 0.09524326026439667, |
|
"learning_rate": 0.00016966000000000003, |
|
"loss": 0.0338, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.1496861054383178, |
|
"grad_norm": 0.0670013502240181, |
|
"learning_rate": 0.00016962, |
|
"loss": 0.033, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.1496861054383178, |
|
"eval_cer": 0.04317970118571997, |
|
"eval_loss": 0.03775278851389885, |
|
"eval_runtime": 10413.2831, |
|
"eval_samples_per_second": 2.022, |
|
"eval_steps_per_second": 0.253, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.1511988503138946, |
|
"grad_norm": 0.07331959903240204, |
|
"learning_rate": 0.00016958, |
|
"loss": 0.0331, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.1527115951894713, |
|
"grad_norm": 0.06851343810558319, |
|
"learning_rate": 0.00016954, |
|
"loss": 0.0306, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.154224340065048, |
|
"grad_norm": 0.07627418637275696, |
|
"learning_rate": 0.00016950000000000003, |
|
"loss": 0.0334, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.1557370849406248, |
|
"grad_norm": 0.08676694333553314, |
|
"learning_rate": 0.00016946000000000002, |
|
"loss": 0.0322, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.1572498298162015, |
|
"grad_norm": 0.07023747265338898, |
|
"learning_rate": 0.00016942000000000001, |
|
"loss": 0.0358, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.1587625746917782, |
|
"grad_norm": 0.07805462926626205, |
|
"learning_rate": 0.00016938, |
|
"loss": 0.0325, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.160275319567355, |
|
"grad_norm": 0.0867529958486557, |
|
"learning_rate": 0.00016934, |
|
"loss": 0.0318, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.1617880644429317, |
|
"grad_norm": 0.08449842035770416, |
|
"learning_rate": 0.00016930000000000002, |
|
"loss": 0.0408, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.1633008093185084, |
|
"grad_norm": 0.08054087311029434, |
|
"learning_rate": 0.00016926000000000002, |
|
"loss": 0.0306, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.1648135541940852, |
|
"grad_norm": 0.08645962178707123, |
|
"learning_rate": 0.00016922, |
|
"loss": 0.0299, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.166326299069662, |
|
"grad_norm": 0.0892554521560669, |
|
"learning_rate": 0.00016918, |
|
"loss": 0.0352, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.1678390439452386, |
|
"grad_norm": 0.06643500924110413, |
|
"learning_rate": 0.00016914, |
|
"loss": 0.0284, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.1693517888208154, |
|
"grad_norm": 0.06918591260910034, |
|
"learning_rate": 0.00016910000000000002, |
|
"loss": 0.0278, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.170864533696392, |
|
"grad_norm": 0.08370740711688995, |
|
"learning_rate": 0.00016906, |
|
"loss": 0.0316, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.1723772785719688, |
|
"grad_norm": 0.053777385503053665, |
|
"learning_rate": 0.00016902, |
|
"loss": 0.036, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.1738900234475456, |
|
"grad_norm": 0.0665329247713089, |
|
"learning_rate": 0.00016898, |
|
"loss": 0.0333, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.1754027683231223, |
|
"grad_norm": 0.07484222948551178, |
|
"learning_rate": 0.00016894000000000002, |
|
"loss": 0.0319, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.176915513198699, |
|
"grad_norm": 0.08218715339899063, |
|
"learning_rate": 0.0001689, |
|
"loss": 0.0308, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.1784282580742758, |
|
"grad_norm": 0.06873024255037308, |
|
"learning_rate": 0.00016886, |
|
"loss": 0.0349, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.1799410029498525, |
|
"grad_norm": 0.07846609503030777, |
|
"learning_rate": 0.00016882, |
|
"loss": 0.0359, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.1799410029498525, |
|
"eval_cer": 0.1078840865459451, |
|
"eval_loss": 0.03878456726670265, |
|
"eval_runtime": 10398.1972, |
|
"eval_samples_per_second": 2.025, |
|
"eval_steps_per_second": 0.253, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.1814537478254292, |
|
"grad_norm": 0.06112883612513542, |
|
"learning_rate": 0.00016878, |
|
"loss": 0.0324, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.182966492701006, |
|
"grad_norm": 0.07065495103597641, |
|
"learning_rate": 0.00016874000000000001, |
|
"loss": 0.0333, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.1844792375765827, |
|
"grad_norm": 0.10944267362356186, |
|
"learning_rate": 0.0001687, |
|
"loss": 0.0322, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.1859919824521594, |
|
"grad_norm": 0.08741329610347748, |
|
"learning_rate": 0.00016866000000000003, |
|
"loss": 0.0339, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.1875047273277362, |
|
"grad_norm": 0.06457091867923737, |
|
"learning_rate": 0.00016862, |
|
"loss": 0.0345, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.1890174722033129, |
|
"grad_norm": 0.0570165179669857, |
|
"learning_rate": 0.00016858000000000002, |
|
"loss": 0.032, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.1905302170788896, |
|
"grad_norm": 0.07944530248641968, |
|
"learning_rate": 0.00016854, |
|
"loss": 0.0347, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.1920429619544664, |
|
"grad_norm": 0.06981216371059418, |
|
"learning_rate": 0.0001685, |
|
"loss": 0.0329, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.193555706830043, |
|
"grad_norm": 0.052252449095249176, |
|
"learning_rate": 0.00016846000000000002, |
|
"loss": 0.0327, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.1950684517056198, |
|
"grad_norm": 0.05333190783858299, |
|
"learning_rate": 0.00016842, |
|
"loss": 0.0269, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.1965811965811965, |
|
"grad_norm": 0.18012838065624237, |
|
"learning_rate": 0.00016838, |
|
"loss": 0.0324, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.1980939414567733, |
|
"grad_norm": 0.06892676651477814, |
|
"learning_rate": 0.00016834, |
|
"loss": 0.0294, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.19960668633235, |
|
"grad_norm": 0.07558593899011612, |
|
"learning_rate": 0.00016830000000000003, |
|
"loss": 0.0371, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.2011194312079267, |
|
"grad_norm": 0.08046507835388184, |
|
"learning_rate": 0.00016826000000000002, |
|
"loss": 0.0311, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.2026321760835035, |
|
"grad_norm": 0.07986424118280411, |
|
"learning_rate": 0.00016822, |
|
"loss": 0.0357, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.2041449209590802, |
|
"grad_norm": 0.07394195348024368, |
|
"learning_rate": 0.00016818, |
|
"loss": 0.0341, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.205657665834657, |
|
"grad_norm": 0.06269822269678116, |
|
"learning_rate": 0.00016814, |
|
"loss": 0.0329, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.2071704107102337, |
|
"grad_norm": 0.07179784774780273, |
|
"learning_rate": 0.00016810000000000002, |
|
"loss": 0.0329, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.2086831555858104, |
|
"grad_norm": 0.10174887627363205, |
|
"learning_rate": 0.00016806000000000001, |
|
"loss": 0.0262, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.2101959004613871, |
|
"grad_norm": 0.06536643952131271, |
|
"learning_rate": 0.00016802, |
|
"loss": 0.034, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.2101959004613871, |
|
"eval_cer": 0.15941559003095868, |
|
"eval_loss": 0.03837862238287926, |
|
"eval_runtime": 10390.1541, |
|
"eval_samples_per_second": 2.026, |
|
"eval_steps_per_second": 0.253, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.2117086453369639, |
|
"grad_norm": 0.13079065084457397, |
|
"learning_rate": 0.00016798, |
|
"loss": 0.037, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.2132213902125406, |
|
"grad_norm": 0.07293607294559479, |
|
"learning_rate": 0.00016794000000000002, |
|
"loss": 0.0295, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.2147341350881173, |
|
"grad_norm": 0.07390507310628891, |
|
"learning_rate": 0.00016790000000000002, |
|
"loss": 0.0309, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.216246879963694, |
|
"grad_norm": 0.22675780951976776, |
|
"learning_rate": 0.00016786, |
|
"loss": 0.0341, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.2177596248392708, |
|
"grad_norm": 0.06630139797925949, |
|
"learning_rate": 0.00016782, |
|
"loss": 0.0359, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.2192723697148475, |
|
"grad_norm": 0.09231210500001907, |
|
"learning_rate": 0.00016778, |
|
"loss": 0.0325, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.2207851145904243, |
|
"grad_norm": 0.067893847823143, |
|
"learning_rate": 0.00016774000000000002, |
|
"loss": 0.0338, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.222297859466001, |
|
"grad_norm": 0.16284491121768951, |
|
"learning_rate": 0.0001677, |
|
"loss": 0.0362, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.2238106043415777, |
|
"grad_norm": 0.07695828378200531, |
|
"learning_rate": 0.00016766, |
|
"loss": 0.0367, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.2253233492171545, |
|
"grad_norm": 0.07685229927301407, |
|
"learning_rate": 0.00016762, |
|
"loss": 0.0383, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.2268360940927312, |
|
"grad_norm": 0.08510534465312958, |
|
"learning_rate": 0.00016758, |
|
"loss": 0.0346, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.228348838968308, |
|
"grad_norm": 0.16018977761268616, |
|
"learning_rate": 0.00016754, |
|
"loss": 0.0314, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.2298615838438847, |
|
"grad_norm": 0.10644716769456863, |
|
"learning_rate": 0.0001675, |
|
"loss": 0.0427, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.2313743287194614, |
|
"grad_norm": 0.06390608847141266, |
|
"learning_rate": 0.00016746000000000003, |
|
"loss": 0.0333, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.2328870735950381, |
|
"grad_norm": 0.1173742264509201, |
|
"learning_rate": 0.00016742, |
|
"loss": 0.0335, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.2343998184706149, |
|
"grad_norm": 0.08506636321544647, |
|
"learning_rate": 0.00016738000000000001, |
|
"loss": 0.0393, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.2359125633461916, |
|
"grad_norm": 0.08176897466182709, |
|
"learning_rate": 0.00016734, |
|
"loss": 0.0306, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.2374253082217683, |
|
"grad_norm": 0.11272590607404709, |
|
"learning_rate": 0.0001673, |
|
"loss": 0.0368, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.238938053097345, |
|
"grad_norm": 0.10923430323600769, |
|
"learning_rate": 0.00016726000000000002, |
|
"loss": 0.0389, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.2404507979729218, |
|
"grad_norm": 0.05665091425180435, |
|
"learning_rate": 0.00016722, |
|
"loss": 0.0352, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.2404507979729218, |
|
"eval_cer": 0.195939668868118, |
|
"eval_loss": 0.03837649151682854, |
|
"eval_runtime": 10379.5895, |
|
"eval_samples_per_second": 2.028, |
|
"eval_steps_per_second": 0.254, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.2419635428484985, |
|
"grad_norm": 0.08927123993635178, |
|
"learning_rate": 0.00016718, |
|
"loss": 0.0356, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.2434762877240753, |
|
"grad_norm": 0.09398534893989563, |
|
"learning_rate": 0.00016714, |
|
"loss": 0.0365, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.244989032599652, |
|
"grad_norm": 0.0905461311340332, |
|
"learning_rate": 0.00016710000000000002, |
|
"loss": 0.0335, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.2465017774752287, |
|
"grad_norm": 0.09033455699682236, |
|
"learning_rate": 0.00016706000000000002, |
|
"loss": 0.0376, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.2480145223508055, |
|
"grad_norm": 0.08217161148786545, |
|
"learning_rate": 0.00016702, |
|
"loss": 0.032, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.2495272672263822, |
|
"grad_norm": 0.0694824755191803, |
|
"learning_rate": 0.00016698, |
|
"loss": 0.0354, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.2510400121019591, |
|
"grad_norm": 0.08535374701023102, |
|
"learning_rate": 0.00016694, |
|
"loss": 0.0288, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.2525527569775359, |
|
"grad_norm": 0.10267391055822372, |
|
"learning_rate": 0.00016690000000000002, |
|
"loss": 0.0331, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.2540655018531126, |
|
"grad_norm": 0.0720328763127327, |
|
"learning_rate": 0.00016686, |
|
"loss": 0.0324, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.2555782467286893, |
|
"grad_norm": 0.15617039799690247, |
|
"learning_rate": 0.00016682, |
|
"loss": 0.0374, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.257090991604266, |
|
"grad_norm": 0.09863468259572983, |
|
"learning_rate": 0.00016678, |
|
"loss": 0.0363, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.2586037364798428, |
|
"grad_norm": 0.08562877029180527, |
|
"learning_rate": 0.00016674000000000002, |
|
"loss": 0.0347, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.2601164813554195, |
|
"grad_norm": 0.09868349879980087, |
|
"learning_rate": 0.0001667, |
|
"loss": 0.0362, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.2616292262309963, |
|
"grad_norm": 0.09744835644960403, |
|
"learning_rate": 0.00016666, |
|
"loss": 0.0364, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.263141971106573, |
|
"grad_norm": 0.19243358075618744, |
|
"learning_rate": 0.00016662, |
|
"loss": 0.0378, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.2646547159821497, |
|
"grad_norm": 0.06478457897901535, |
|
"learning_rate": 0.00016658, |
|
"loss": 0.033, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.2661674608577265, |
|
"grad_norm": 0.09313791990280151, |
|
"learning_rate": 0.00016654000000000001, |
|
"loss": 0.04, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.2676802057333032, |
|
"grad_norm": 0.0906825065612793, |
|
"learning_rate": 0.0001665, |
|
"loss": 0.0341, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.26919295060888, |
|
"grad_norm": 0.08549359440803528, |
|
"learning_rate": 0.00016646000000000003, |
|
"loss": 0.0376, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.2707056954844567, |
|
"grad_norm": 0.0915452241897583, |
|
"learning_rate": 0.00016642, |
|
"loss": 0.029, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.2707056954844567, |
|
"eval_cer": 0.19141261028875828, |
|
"eval_loss": 0.03777679055929184, |
|
"eval_runtime": 10360.722, |
|
"eval_samples_per_second": 2.032, |
|
"eval_steps_per_second": 0.254, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.2722184403600334, |
|
"grad_norm": 0.07039971649646759, |
|
"learning_rate": 0.00016638, |
|
"loss": 0.0355, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.2737311852356101, |
|
"grad_norm": 0.08890164643526077, |
|
"learning_rate": 0.00016634, |
|
"loss": 0.03, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.2752439301111869, |
|
"grad_norm": 0.07611805945634842, |
|
"learning_rate": 0.0001663, |
|
"loss": 0.037, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.2767566749867636, |
|
"grad_norm": 0.10268427431583405, |
|
"learning_rate": 0.00016626000000000002, |
|
"loss": 0.0346, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.2782694198623403, |
|
"grad_norm": 0.07185817509889603, |
|
"learning_rate": 0.00016622, |
|
"loss": 0.0334, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.279782164737917, |
|
"grad_norm": 0.09720634669065475, |
|
"learning_rate": 0.00016618, |
|
"loss": 0.0328, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.2812949096134938, |
|
"grad_norm": 0.08373324573040009, |
|
"learning_rate": 0.00016614, |
|
"loss": 0.0342, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.2828076544890705, |
|
"grad_norm": 0.05525701493024826, |
|
"learning_rate": 0.0001661, |
|
"loss": 0.0295, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.2843203993646473, |
|
"grad_norm": 0.08398504555225372, |
|
"learning_rate": 0.00016606000000000002, |
|
"loss": 0.0336, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.285833144240224, |
|
"grad_norm": 0.11384329944849014, |
|
"learning_rate": 0.00016601999999999999, |
|
"loss": 0.0335, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.2873458891158007, |
|
"grad_norm": 0.05366117134690285, |
|
"learning_rate": 0.00016598, |
|
"loss": 0.0303, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.2888586339913775, |
|
"grad_norm": 0.09270923584699631, |
|
"learning_rate": 0.00016594, |
|
"loss": 0.0309, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.2903713788669542, |
|
"grad_norm": 0.09621911495923996, |
|
"learning_rate": 0.00016590000000000002, |
|
"loss": 0.0326, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.291884123742531, |
|
"grad_norm": 0.09750113636255264, |
|
"learning_rate": 0.00016586000000000001, |
|
"loss": 0.032, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.2933968686181077, |
|
"grad_norm": 0.08557499945163727, |
|
"learning_rate": 0.00016582, |
|
"loss": 0.0331, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.2949096134936844, |
|
"grad_norm": 0.0842200294137001, |
|
"learning_rate": 0.00016578, |
|
"loss": 0.0339, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.2964223583692611, |
|
"grad_norm": 0.06341574341058731, |
|
"learning_rate": 0.00016574, |
|
"loss": 0.0369, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.2979351032448379, |
|
"grad_norm": 0.07687686383724213, |
|
"learning_rate": 0.00016570000000000002, |
|
"loss": 0.0291, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.2994478481204146, |
|
"grad_norm": 0.07118263840675354, |
|
"learning_rate": 0.00016566, |
|
"loss": 0.0331, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.3009605929959913, |
|
"grad_norm": 0.10967772454023361, |
|
"learning_rate": 0.00016562, |
|
"loss": 0.04, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.3009605929959913, |
|
"eval_cer": 0.15955704130871465, |
|
"eval_loss": 0.03786647692322731, |
|
"eval_runtime": 10383.8112, |
|
"eval_samples_per_second": 2.027, |
|
"eval_steps_per_second": 0.253, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.302473337871568, |
|
"grad_norm": 0.09102348983287811, |
|
"learning_rate": 0.00016558, |
|
"loss": 0.0337, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.3039860827471448, |
|
"grad_norm": 0.0596625916659832, |
|
"learning_rate": 0.00016554000000000002, |
|
"loss": 0.0341, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.3054988276227215, |
|
"grad_norm": 0.0790410116314888, |
|
"learning_rate": 0.0001655, |
|
"loss": 0.0348, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.3070115724982982, |
|
"grad_norm": 0.08243832737207413, |
|
"learning_rate": 0.00016546, |
|
"loss": 0.0351, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.308524317373875, |
|
"grad_norm": 0.07890262454748154, |
|
"learning_rate": 0.00016542, |
|
"loss": 0.0331, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.3100370622494517, |
|
"grad_norm": 0.06424404680728912, |
|
"learning_rate": 0.00016538, |
|
"loss": 0.032, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.3115498071250284, |
|
"grad_norm": 0.08828658610582352, |
|
"learning_rate": 0.00016534, |
|
"loss": 0.0351, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.3130625520006052, |
|
"grad_norm": 0.07190482318401337, |
|
"learning_rate": 0.0001653, |
|
"loss": 0.0334, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.314575296876182, |
|
"grad_norm": 0.1207108125090599, |
|
"learning_rate": 0.00016526000000000003, |
|
"loss": 0.0333, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.3160880417517586, |
|
"grad_norm": 0.057197410613298416, |
|
"learning_rate": 0.00016522, |
|
"loss": 0.0273, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.3176007866273354, |
|
"grad_norm": 0.0845530703663826, |
|
"learning_rate": 0.00016518000000000001, |
|
"loss": 0.0398, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.319113531502912, |
|
"grad_norm": 0.07357069104909897, |
|
"learning_rate": 0.00016514, |
|
"loss": 0.0334, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.3206262763784888, |
|
"grad_norm": 0.07419273257255554, |
|
"learning_rate": 0.0001651, |
|
"loss": 0.0267, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.3221390212540656, |
|
"grad_norm": 0.08293847739696503, |
|
"learning_rate": 0.00016506000000000002, |
|
"loss": 0.0286, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.3236517661296423, |
|
"grad_norm": 0.09437254071235657, |
|
"learning_rate": 0.00016502, |
|
"loss": 0.0411, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.325164511005219, |
|
"grad_norm": 0.06988554447889328, |
|
"learning_rate": 0.00016498, |
|
"loss": 0.0288, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.3266772558807958, |
|
"grad_norm": 0.11081293970346451, |
|
"learning_rate": 0.00016494, |
|
"loss": 0.0342, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.3281900007563725, |
|
"grad_norm": 0.0911073237657547, |
|
"learning_rate": 0.0001649, |
|
"loss": 0.0324, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.3297027456319492, |
|
"grad_norm": 0.08337673544883728, |
|
"learning_rate": 0.00016486000000000002, |
|
"loss": 0.0297, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.331215490507526, |
|
"grad_norm": 0.09077824652194977, |
|
"learning_rate": 0.00016482, |
|
"loss": 0.0319, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.331215490507526, |
|
"eval_cer": 0.050760007214632856, |
|
"eval_loss": 0.03842457756400108, |
|
"eval_runtime": 10378.6583, |
|
"eval_samples_per_second": 2.028, |
|
"eval_steps_per_second": 0.254, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.3327282353831027, |
|
"grad_norm": 0.12336084991693497, |
|
"learning_rate": 0.00016478, |
|
"loss": 0.0371, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.3342409802586794, |
|
"grad_norm": 0.07978357374668121, |
|
"learning_rate": 0.00016474, |
|
"loss": 0.0349, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.3357537251342562, |
|
"grad_norm": 0.1073361411690712, |
|
"learning_rate": 0.00016470000000000002, |
|
"loss": 0.0417, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.337266470009833, |
|
"grad_norm": 0.05822708085179329, |
|
"learning_rate": 0.00016466, |
|
"loss": 0.0302, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.3387792148854096, |
|
"grad_norm": 0.06241593137383461, |
|
"learning_rate": 0.00016462, |
|
"loss": 0.0365, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.3402919597609864, |
|
"grad_norm": 0.10107123106718063, |
|
"learning_rate": 0.00016458, |
|
"loss": 0.0345, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.341804704636563, |
|
"grad_norm": 0.09659604728221893, |
|
"learning_rate": 0.00016454, |
|
"loss": 0.0324, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.3433174495121398, |
|
"grad_norm": 0.07501540333032608, |
|
"learning_rate": 0.00016450000000000001, |
|
"loss": 0.0317, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.3448301943877166, |
|
"grad_norm": 0.071120485663414, |
|
"learning_rate": 0.00016446, |
|
"loss": 0.0299, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.3463429392632933, |
|
"grad_norm": 0.07235920429229736, |
|
"learning_rate": 0.00016442000000000003, |
|
"loss": 0.0337, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.34785568413887, |
|
"grad_norm": 0.08588097244501114, |
|
"learning_rate": 0.00016438, |
|
"loss": 0.0302, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.3493684290144468, |
|
"grad_norm": 0.052244190126657486, |
|
"learning_rate": 0.00016434000000000002, |
|
"loss": 0.0326, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.3508811738900235, |
|
"grad_norm": 0.0702931210398674, |
|
"learning_rate": 0.0001643, |
|
"loss": 0.0372, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.3523939187656002, |
|
"grad_norm": 0.10441485792398453, |
|
"learning_rate": 0.00016426, |
|
"loss": 0.037, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.353906663641177, |
|
"grad_norm": 0.10514800250530243, |
|
"learning_rate": 0.00016422000000000002, |
|
"loss": 0.037, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.3554194085167537, |
|
"grad_norm": 0.07011867314577103, |
|
"learning_rate": 0.00016418, |
|
"loss": 0.0314, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.3569321533923304, |
|
"grad_norm": 0.06335943937301636, |
|
"learning_rate": 0.00016414, |
|
"loss": 0.0311, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.3584448982679072, |
|
"grad_norm": 0.07194424420595169, |
|
"learning_rate": 0.0001641, |
|
"loss": 0.0336, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.3599576431434839, |
|
"grad_norm": 0.07171431183815002, |
|
"learning_rate": 0.00016406000000000003, |
|
"loss": 0.0312, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.3614703880190606, |
|
"grad_norm": 0.14893119037151337, |
|
"learning_rate": 0.00016402000000000002, |
|
"loss": 0.0348, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3614703880190606, |
|
"eval_cer": 0.23852391576669063, |
|
"eval_loss": 0.03737874701619148, |
|
"eval_runtime": 10378.6671, |
|
"eval_samples_per_second": 2.028, |
|
"eval_steps_per_second": 0.254, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3629831328946374, |
|
"grad_norm": 0.09854207932949066, |
|
"learning_rate": 0.00016398, |
|
"loss": 0.0334, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 1.364495877770214, |
|
"grad_norm": 0.0829731673002243, |
|
"learning_rate": 0.00016394, |
|
"loss": 0.0367, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.3660086226457908, |
|
"grad_norm": 0.05378841981291771, |
|
"learning_rate": 0.0001639, |
|
"loss": 0.0328, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 1.3675213675213675, |
|
"grad_norm": 0.08590775728225708, |
|
"learning_rate": 0.00016386000000000002, |
|
"loss": 0.0337, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.3690341123969443, |
|
"grad_norm": 0.06473217159509659, |
|
"learning_rate": 0.00016382000000000001, |
|
"loss": 0.0309, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.370546857272521, |
|
"grad_norm": 0.14496292173862457, |
|
"learning_rate": 0.00016378, |
|
"loss": 0.0362, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.3720596021480977, |
|
"grad_norm": 0.0658840760588646, |
|
"learning_rate": 0.00016374, |
|
"loss": 0.0316, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 1.3735723470236745, |
|
"grad_norm": 0.0722692534327507, |
|
"learning_rate": 0.00016370000000000002, |
|
"loss": 0.0321, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.3750850918992512, |
|
"grad_norm": 0.0751873180270195, |
|
"learning_rate": 0.00016366000000000002, |
|
"loss": 0.0357, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 1.376597836774828, |
|
"grad_norm": 0.07309116423130035, |
|
"learning_rate": 0.00016362, |
|
"loss": 0.0329, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.3781105816504047, |
|
"grad_norm": 0.09205902367830276, |
|
"learning_rate": 0.00016358, |
|
"loss": 0.0311, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 1.3796233265259814, |
|
"grad_norm": 0.06787604093551636, |
|
"learning_rate": 0.00016354, |
|
"loss": 0.0308, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.3811360714015581, |
|
"grad_norm": 0.08365906029939651, |
|
"learning_rate": 0.00016350000000000002, |
|
"loss": 0.0344, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 1.3826488162771349, |
|
"grad_norm": 0.07461418211460114, |
|
"learning_rate": 0.00016346, |
|
"loss": 0.0286, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.3841615611527116, |
|
"grad_norm": 0.11862760782241821, |
|
"learning_rate": 0.00016342, |
|
"loss": 0.0361, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.3856743060282883, |
|
"grad_norm": 0.07170487195253372, |
|
"learning_rate": 0.00016338, |
|
"loss": 0.0335, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.387187050903865, |
|
"grad_norm": 0.05578533932566643, |
|
"learning_rate": 0.00016334, |
|
"loss": 0.0311, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 1.3886997957794418, |
|
"grad_norm": 0.08838359266519547, |
|
"learning_rate": 0.0001633, |
|
"loss": 0.0341, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.3902125406550185, |
|
"grad_norm": 0.09284081310033798, |
|
"learning_rate": 0.00016326, |
|
"loss": 0.0322, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 1.3917252855305953, |
|
"grad_norm": 0.07425800710916519, |
|
"learning_rate": 0.00016322000000000003, |
|
"loss": 0.0319, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.3917252855305953, |
|
"eval_cer": 0.11210909414354649, |
|
"eval_loss": 0.036687206476926804, |
|
"eval_runtime": 10439.2076, |
|
"eval_samples_per_second": 2.017, |
|
"eval_steps_per_second": 0.252, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.393238030406172, |
|
"grad_norm": 0.0754477009177208, |
|
"learning_rate": 0.00016318, |
|
"loss": 0.0355, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 1.3947507752817487, |
|
"grad_norm": 0.06408898532390594, |
|
"learning_rate": 0.00016314, |
|
"loss": 0.0345, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.3962635201573255, |
|
"grad_norm": 0.06003674492239952, |
|
"learning_rate": 0.0001631, |
|
"loss": 0.0316, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 1.3977762650329022, |
|
"grad_norm": 0.07409165799617767, |
|
"learning_rate": 0.00016306, |
|
"loss": 0.03, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.399289009908479, |
|
"grad_norm": 0.07411226630210876, |
|
"learning_rate": 0.00016302000000000002, |
|
"loss": 0.0325, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.4008017547840557, |
|
"grad_norm": 0.09041300415992737, |
|
"learning_rate": 0.00016298, |
|
"loss": 0.034, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.4023144996596324, |
|
"grad_norm": 0.0684356689453125, |
|
"learning_rate": 0.00016294, |
|
"loss": 0.0345, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 1.4038272445352091, |
|
"grad_norm": 0.08621818572282791, |
|
"learning_rate": 0.0001629, |
|
"loss": 0.0287, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.4053399894107859, |
|
"grad_norm": 0.09592179954051971, |
|
"learning_rate": 0.00016286000000000002, |
|
"loss": 0.0371, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 1.4068527342863626, |
|
"grad_norm": 0.061489395797252655, |
|
"learning_rate": 0.00016282000000000002, |
|
"loss": 0.0297, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.4083654791619393, |
|
"grad_norm": 0.08933687955141068, |
|
"learning_rate": 0.00016278, |
|
"loss": 0.0329, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 1.409878224037516, |
|
"grad_norm": 0.06542832404375076, |
|
"learning_rate": 0.00016274, |
|
"loss": 0.0359, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.4113909689130928, |
|
"grad_norm": 0.10515543818473816, |
|
"learning_rate": 0.0001627, |
|
"loss": 0.0282, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 1.4129037137886695, |
|
"grad_norm": 0.11535684019327164, |
|
"learning_rate": 0.00016266000000000002, |
|
"loss": 0.0346, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.4144164586642463, |
|
"grad_norm": 0.10359009355306625, |
|
"learning_rate": 0.00016262, |
|
"loss": 0.0326, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.415929203539823, |
|
"grad_norm": 0.08905740082263947, |
|
"learning_rate": 0.00016258, |
|
"loss": 0.0353, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.4174419484153997, |
|
"grad_norm": 0.0570446141064167, |
|
"learning_rate": 0.00016254, |
|
"loss": 0.0282, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 1.4189546932909765, |
|
"grad_norm": 0.0748140960931778, |
|
"learning_rate": 0.00016250000000000002, |
|
"loss": 0.0304, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.4204674381665532, |
|
"grad_norm": 0.07355400919914246, |
|
"learning_rate": 0.00016246, |
|
"loss": 0.031, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 1.42198018304213, |
|
"grad_norm": 0.09431416541337967, |
|
"learning_rate": 0.00016242, |
|
"loss": 0.0355, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.42198018304213, |
|
"eval_cer": 0.09460805024547048, |
|
"eval_loss": 0.03653513640165329, |
|
"eval_runtime": 10519.6629, |
|
"eval_samples_per_second": 2.001, |
|
"eval_steps_per_second": 0.25, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.4234929279177067, |
|
"grad_norm": 0.10641132295131683, |
|
"learning_rate": 0.00016238, |
|
"loss": 0.0299, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 1.4250056727932834, |
|
"grad_norm": 0.051270656287670135, |
|
"learning_rate": 0.00016234, |
|
"loss": 0.0317, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.4265184176688601, |
|
"grad_norm": 0.07362283766269684, |
|
"learning_rate": 0.00016230000000000001, |
|
"loss": 0.0269, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 1.4280311625444368, |
|
"grad_norm": 0.060159552842378616, |
|
"learning_rate": 0.00016226, |
|
"loss": 0.0335, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.4295439074200136, |
|
"grad_norm": 0.08667318522930145, |
|
"learning_rate": 0.00016222000000000003, |
|
"loss": 0.0361, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.4310566522955903, |
|
"grad_norm": 0.06154588237404823, |
|
"learning_rate": 0.00016218, |
|
"loss": 0.0334, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.432569397171167, |
|
"grad_norm": 0.10563425719738007, |
|
"learning_rate": 0.00016214000000000002, |
|
"loss": 0.0362, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 1.4340821420467438, |
|
"grad_norm": 0.10325556248426437, |
|
"learning_rate": 0.0001621, |
|
"loss": 0.0343, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.4355948869223205, |
|
"grad_norm": 0.08902329206466675, |
|
"learning_rate": 0.00016206, |
|
"loss": 0.032, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 1.4371076317978972, |
|
"grad_norm": 0.07280543446540833, |
|
"learning_rate": 0.00016202000000000002, |
|
"loss": 0.0366, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.438620376673474, |
|
"grad_norm": 0.09071139991283417, |
|
"learning_rate": 0.00016198, |
|
"loss": 0.0299, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 1.4401331215490507, |
|
"grad_norm": 0.06658421456813812, |
|
"learning_rate": 0.00016194, |
|
"loss": 0.0281, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.4416458664246274, |
|
"grad_norm": 0.0793207511305809, |
|
"learning_rate": 0.0001619, |
|
"loss": 0.0292, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 1.4431586113002042, |
|
"grad_norm": 0.0829392522573471, |
|
"learning_rate": 0.00016186, |
|
"loss": 0.0337, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.444671356175781, |
|
"grad_norm": 0.061817191541194916, |
|
"learning_rate": 0.00016182000000000002, |
|
"loss": 0.0298, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.4461841010513576, |
|
"grad_norm": 0.09837779402732849, |
|
"learning_rate": 0.00016177999999999999, |
|
"loss": 0.037, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.4476968459269344, |
|
"grad_norm": 0.05777046084403992, |
|
"learning_rate": 0.00016174, |
|
"loss": 0.0339, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 1.449209590802511, |
|
"grad_norm": 0.07731931656599045, |
|
"learning_rate": 0.0001617, |
|
"loss": 0.0338, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.4507223356780878, |
|
"grad_norm": 0.08898504078388214, |
|
"learning_rate": 0.00016166000000000002, |
|
"loss": 0.0358, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 1.4522350805536646, |
|
"grad_norm": 0.0696534812450409, |
|
"learning_rate": 0.00016162000000000001, |
|
"loss": 0.0318, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.4522350805536646, |
|
"eval_cer": 0.08453906649568975, |
|
"eval_loss": 0.036363635212183, |
|
"eval_runtime": 10514.0599, |
|
"eval_samples_per_second": 2.002, |
|
"eval_steps_per_second": 0.25, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.4537478254292413, |
|
"grad_norm": 0.059242941439151764, |
|
"learning_rate": 0.00016158, |
|
"loss": 0.0313, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 1.455260570304818, |
|
"grad_norm": 0.0844852551817894, |
|
"learning_rate": 0.00016154, |
|
"loss": 0.034, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.4567733151803948, |
|
"grad_norm": 0.08737514168024063, |
|
"learning_rate": 0.0001615, |
|
"loss": 0.0314, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 1.4582860600559715, |
|
"grad_norm": 0.08028477430343628, |
|
"learning_rate": 0.00016146000000000002, |
|
"loss": 0.028, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.4597988049315482, |
|
"grad_norm": 0.08293917775154114, |
|
"learning_rate": 0.00016142, |
|
"loss": 0.0344, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.461311549807125, |
|
"grad_norm": 0.07055462896823883, |
|
"learning_rate": 0.00016138, |
|
"loss": 0.0329, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.4628242946827017, |
|
"grad_norm": 0.08431320637464523, |
|
"learning_rate": 0.00016134, |
|
"loss": 0.0313, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 1.4643370395582784, |
|
"grad_norm": 0.09756868332624435, |
|
"learning_rate": 0.00016130000000000002, |
|
"loss": 0.0305, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.4658497844338552, |
|
"grad_norm": 0.07265082001686096, |
|
"learning_rate": 0.00016126, |
|
"loss": 0.0333, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 1.467362529309432, |
|
"grad_norm": 0.09156455099582672, |
|
"learning_rate": 0.00016122, |
|
"loss": 0.0356, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.4688752741850086, |
|
"grad_norm": 0.06957582384347916, |
|
"learning_rate": 0.00016118, |
|
"loss": 0.0313, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 1.4703880190605854, |
|
"grad_norm": 0.06783420592546463, |
|
"learning_rate": 0.00016114, |
|
"loss": 0.0297, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.471900763936162, |
|
"grad_norm": 0.07193417102098465, |
|
"learning_rate": 0.0001611, |
|
"loss": 0.0302, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 1.4734135088117388, |
|
"grad_norm": 0.08238872140645981, |
|
"learning_rate": 0.00016106, |
|
"loss": 0.0335, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.4749262536873156, |
|
"grad_norm": 0.07197025418281555, |
|
"learning_rate": 0.00016102000000000003, |
|
"loss": 0.0369, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.4764389985628923, |
|
"grad_norm": 0.08109525591135025, |
|
"learning_rate": 0.00016098, |
|
"loss": 0.0327, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.477951743438469, |
|
"grad_norm": 0.12331151217222214, |
|
"learning_rate": 0.00016094000000000001, |
|
"loss": 0.0372, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 1.4794644883140458, |
|
"grad_norm": 0.08190298080444336, |
|
"learning_rate": 0.0001609, |
|
"loss": 0.0293, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.4809772331896225, |
|
"grad_norm": 0.05840008333325386, |
|
"learning_rate": 0.00016086, |
|
"loss": 0.0349, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 1.4824899780651992, |
|
"grad_norm": 0.07874023169279099, |
|
"learning_rate": 0.00016082000000000002, |
|
"loss": 0.0322, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.4824899780651992, |
|
"eval_cer": 0.24973192203254985, |
|
"eval_loss": 0.036100711673498154, |
|
"eval_runtime": 10381.657, |
|
"eval_samples_per_second": 2.028, |
|
"eval_steps_per_second": 0.254, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.484002722940776, |
|
"grad_norm": 0.0776941329240799, |
|
"learning_rate": 0.00016078, |
|
"loss": 0.0358, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 1.4855154678163527, |
|
"grad_norm": 0.12248267233371735, |
|
"learning_rate": 0.00016074, |
|
"loss": 0.0356, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.4870282126919294, |
|
"grad_norm": 0.08847146481275558, |
|
"learning_rate": 0.0001607, |
|
"loss": 0.0274, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 1.4885409575675062, |
|
"grad_norm": 0.0689850002527237, |
|
"learning_rate": 0.00016066000000000002, |
|
"loss": 0.0266, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.4900537024430829, |
|
"grad_norm": 0.06342552602291107, |
|
"learning_rate": 0.00016062000000000002, |
|
"loss": 0.031, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.4915664473186596, |
|
"grad_norm": 0.11846140772104263, |
|
"learning_rate": 0.00016057999999999998, |
|
"loss": 0.0348, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.4930791921942363, |
|
"grad_norm": 0.07698410004377365, |
|
"learning_rate": 0.00016054, |
|
"loss": 0.0259, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 1.494591937069813, |
|
"grad_norm": 0.11177106946706772, |
|
"learning_rate": 0.0001605, |
|
"loss": 0.0301, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.4961046819453898, |
|
"grad_norm": 0.09459209442138672, |
|
"learning_rate": 0.00016046000000000002, |
|
"loss": 0.0349, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 1.4976174268209665, |
|
"grad_norm": 0.08800119906663895, |
|
"learning_rate": 0.00016042, |
|
"loss": 0.0335, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.4991301716965433, |
|
"grad_norm": 0.09330447763204575, |
|
"learning_rate": 0.00016038, |
|
"loss": 0.0326, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 1.50064291657212, |
|
"grad_norm": 0.10210063308477402, |
|
"learning_rate": 0.00016034, |
|
"loss": 0.035, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.5021556614476967, |
|
"grad_norm": 0.11886809766292572, |
|
"learning_rate": 0.0001603, |
|
"loss": 0.036, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 1.5036684063232735, |
|
"grad_norm": 0.07646410167217255, |
|
"learning_rate": 0.00016026000000000001, |
|
"loss": 0.0269, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.5051811511988502, |
|
"grad_norm": 0.09994587302207947, |
|
"learning_rate": 0.00016022, |
|
"loss": 0.0298, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.506693896074427, |
|
"grad_norm": 0.0781632736325264, |
|
"learning_rate": 0.00016018, |
|
"loss": 0.0299, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.5082066409500037, |
|
"grad_norm": 0.09286709874868393, |
|
"learning_rate": 0.00016014, |
|
"loss": 0.0334, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 1.5097193858255804, |
|
"grad_norm": 0.08658807724714279, |
|
"learning_rate": 0.00016010000000000002, |
|
"loss": 0.032, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.5112321307011571, |
|
"grad_norm": 0.09535326808691025, |
|
"learning_rate": 0.00016006, |
|
"loss": 0.032, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 1.5127448755767339, |
|
"grad_norm": 0.056372299790382385, |
|
"learning_rate": 0.00016002, |
|
"loss": 0.033, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.5127448755767339, |
|
"eval_cer": 0.1808933296766016, |
|
"eval_loss": 0.03580623120069504, |
|
"eval_runtime": 10388.4948, |
|
"eval_samples_per_second": 2.026, |
|
"eval_steps_per_second": 0.253, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 50000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.622822387689695e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|