hadrakey's picture
Training in progress, step 1000
e06b649 verified
raw
history blame
182 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.5127448755767339,
"eval_steps": 200,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001512744875576734,
"grad_norm": 0.8282566070556641,
"learning_rate": 0.00019996,
"loss": 3.4576,
"step": 10
},
{
"epoch": 0.003025489751153468,
"grad_norm": 0.1628154069185257,
"learning_rate": 0.00019992000000000002,
"loss": 0.0992,
"step": 20
},
{
"epoch": 0.004538234626730202,
"grad_norm": 0.17421123385429382,
"learning_rate": 0.00019988,
"loss": 0.0666,
"step": 30
},
{
"epoch": 0.006050979502306936,
"grad_norm": 0.08850277960300446,
"learning_rate": 0.00019984,
"loss": 0.0661,
"step": 40
},
{
"epoch": 0.00756372437788367,
"grad_norm": 0.11368270963430405,
"learning_rate": 0.0001998,
"loss": 0.0639,
"step": 50
},
{
"epoch": 0.009076469253460404,
"grad_norm": 0.12990300357341766,
"learning_rate": 0.00019976000000000003,
"loss": 0.0617,
"step": 60
},
{
"epoch": 0.010589214129037138,
"grad_norm": 0.08885369449853897,
"learning_rate": 0.00019972000000000002,
"loss": 0.0643,
"step": 70
},
{
"epoch": 0.012101959004613872,
"grad_norm": 0.07073435187339783,
"learning_rate": 0.00019968,
"loss": 0.0629,
"step": 80
},
{
"epoch": 0.013614703880190605,
"grad_norm": 0.061856113374233246,
"learning_rate": 0.00019964,
"loss": 0.061,
"step": 90
},
{
"epoch": 0.01512744875576734,
"grad_norm": 0.06827201694250107,
"learning_rate": 0.0001996,
"loss": 0.0586,
"step": 100
},
{
"epoch": 0.016640193631344075,
"grad_norm": 0.07220456004142761,
"learning_rate": 0.00019956000000000002,
"loss": 0.055,
"step": 110
},
{
"epoch": 0.018152938506920808,
"grad_norm": 0.06632555276155472,
"learning_rate": 0.00019952000000000001,
"loss": 0.0586,
"step": 120
},
{
"epoch": 0.01966568338249754,
"grad_norm": 0.09966724365949631,
"learning_rate": 0.00019948,
"loss": 0.0621,
"step": 130
},
{
"epoch": 0.021178428258074276,
"grad_norm": 0.0833888053894043,
"learning_rate": 0.00019944,
"loss": 0.0591,
"step": 140
},
{
"epoch": 0.02269117313365101,
"grad_norm": 0.08170727640390396,
"learning_rate": 0.00019940000000000002,
"loss": 0.055,
"step": 150
},
{
"epoch": 0.024203918009227745,
"grad_norm": 0.07089231163263321,
"learning_rate": 0.00019936000000000002,
"loss": 0.0582,
"step": 160
},
{
"epoch": 0.025716662884804477,
"grad_norm": 0.09390200674533844,
"learning_rate": 0.00019932,
"loss": 0.0628,
"step": 170
},
{
"epoch": 0.02722940776038121,
"grad_norm": 0.06722863018512726,
"learning_rate": 0.00019928,
"loss": 0.0591,
"step": 180
},
{
"epoch": 0.028742152635957946,
"grad_norm": 0.0743609368801117,
"learning_rate": 0.00019924,
"loss": 0.0626,
"step": 190
},
{
"epoch": 0.03025489751153468,
"grad_norm": 0.08125407248735428,
"learning_rate": 0.00019920000000000002,
"loss": 0.0601,
"step": 200
},
{
"epoch": 0.03025489751153468,
"eval_cer": 0.5356160728183765,
"eval_loss": 0.05078176036477089,
"eval_runtime": 10281.8657,
"eval_samples_per_second": 2.047,
"eval_steps_per_second": 0.256,
"step": 200
},
{
"epoch": 0.03176764238711141,
"grad_norm": 0.07030890136957169,
"learning_rate": 0.00019916,
"loss": 0.0597,
"step": 210
},
{
"epoch": 0.03328038726268815,
"grad_norm": 0.05290469154715538,
"learning_rate": 0.00019912,
"loss": 0.0587,
"step": 220
},
{
"epoch": 0.03479313213826488,
"grad_norm": 0.07339277863502502,
"learning_rate": 0.00019908,
"loss": 0.0529,
"step": 230
},
{
"epoch": 0.036305877013841616,
"grad_norm": 0.0727711170911789,
"learning_rate": 0.00019904,
"loss": 0.0539,
"step": 240
},
{
"epoch": 0.03781862188941835,
"grad_norm": 0.07383541762828827,
"learning_rate": 0.000199,
"loss": 0.0532,
"step": 250
},
{
"epoch": 0.03933136676499508,
"grad_norm": 0.07042526453733444,
"learning_rate": 0.00019896,
"loss": 0.0571,
"step": 260
},
{
"epoch": 0.04084411164057182,
"grad_norm": 0.08188482373952866,
"learning_rate": 0.00019892000000000003,
"loss": 0.0521,
"step": 270
},
{
"epoch": 0.04235685651614855,
"grad_norm": 0.07334589958190918,
"learning_rate": 0.00019888,
"loss": 0.0532,
"step": 280
},
{
"epoch": 0.043869601391725285,
"grad_norm": 0.06326377391815186,
"learning_rate": 0.00019884000000000001,
"loss": 0.0528,
"step": 290
},
{
"epoch": 0.04538234626730202,
"grad_norm": 0.05303795263171196,
"learning_rate": 0.0001988,
"loss": 0.0539,
"step": 300
},
{
"epoch": 0.04689509114287875,
"grad_norm": 0.058723289519548416,
"learning_rate": 0.00019876,
"loss": 0.0469,
"step": 310
},
{
"epoch": 0.04840783601845549,
"grad_norm": 0.08683237433433533,
"learning_rate": 0.00019872000000000002,
"loss": 0.0601,
"step": 320
},
{
"epoch": 0.04992058089403222,
"grad_norm": 0.07650341093540192,
"learning_rate": 0.00019868,
"loss": 0.0582,
"step": 330
},
{
"epoch": 0.051433325769608955,
"grad_norm": 0.054965659976005554,
"learning_rate": 0.00019864,
"loss": 0.0548,
"step": 340
},
{
"epoch": 0.05294607064518569,
"grad_norm": 0.06949716061353683,
"learning_rate": 0.0001986,
"loss": 0.0581,
"step": 350
},
{
"epoch": 0.05445881552076242,
"grad_norm": 0.10514732450246811,
"learning_rate": 0.00019856000000000002,
"loss": 0.0587,
"step": 360
},
{
"epoch": 0.05597156039633916,
"grad_norm": 0.06586117297410965,
"learning_rate": 0.00019852000000000002,
"loss": 0.0561,
"step": 370
},
{
"epoch": 0.05748430527191589,
"grad_norm": 0.09821395576000214,
"learning_rate": 0.00019848,
"loss": 0.0556,
"step": 380
},
{
"epoch": 0.058997050147492625,
"grad_norm": 0.06488014757633209,
"learning_rate": 0.00019844,
"loss": 0.0634,
"step": 390
},
{
"epoch": 0.06050979502306936,
"grad_norm": 0.06910958141088486,
"learning_rate": 0.0001984,
"loss": 0.052,
"step": 400
},
{
"epoch": 0.06050979502306936,
"eval_cer": 0.2714758865721352,
"eval_loss": 0.04847713187336922,
"eval_runtime": 10484.76,
"eval_samples_per_second": 2.008,
"eval_steps_per_second": 0.251,
"step": 400
},
{
"epoch": 0.0620225398986461,
"grad_norm": 0.048563435673713684,
"learning_rate": 0.00019836000000000002,
"loss": 0.0565,
"step": 410
},
{
"epoch": 0.06353528477422282,
"grad_norm": 0.055841896682977676,
"learning_rate": 0.00019832,
"loss": 0.0547,
"step": 420
},
{
"epoch": 0.06504802964979955,
"grad_norm": 0.05644605681300163,
"learning_rate": 0.00019828,
"loss": 0.0575,
"step": 430
},
{
"epoch": 0.0665607745253763,
"grad_norm": 0.05617703124880791,
"learning_rate": 0.00019824,
"loss": 0.0514,
"step": 440
},
{
"epoch": 0.06807351940095303,
"grad_norm": 0.11480820178985596,
"learning_rate": 0.00019820000000000002,
"loss": 0.0562,
"step": 450
},
{
"epoch": 0.06958626427652977,
"grad_norm": 0.06004955247044563,
"learning_rate": 0.00019816000000000001,
"loss": 0.0575,
"step": 460
},
{
"epoch": 0.0710990091521065,
"grad_norm": 0.07830873131752014,
"learning_rate": 0.00019812,
"loss": 0.0621,
"step": 470
},
{
"epoch": 0.07261175402768323,
"grad_norm": 0.052650969475507736,
"learning_rate": 0.00019808,
"loss": 0.0599,
"step": 480
},
{
"epoch": 0.07412449890325996,
"grad_norm": 0.09298545122146606,
"learning_rate": 0.00019804,
"loss": 0.0559,
"step": 490
},
{
"epoch": 0.0756372437788367,
"grad_norm": 0.06198689714074135,
"learning_rate": 0.00019800000000000002,
"loss": 0.047,
"step": 500
},
{
"epoch": 0.07714998865441343,
"grad_norm": 0.06688915193080902,
"learning_rate": 0.00019796,
"loss": 0.0523,
"step": 510
},
{
"epoch": 0.07866273352999016,
"grad_norm": 0.06676903367042542,
"learning_rate": 0.00019792000000000003,
"loss": 0.0509,
"step": 520
},
{
"epoch": 0.08017547840556691,
"grad_norm": 0.06219707056879997,
"learning_rate": 0.00019788,
"loss": 0.0553,
"step": 530
},
{
"epoch": 0.08168822328114364,
"grad_norm": 0.07905440032482147,
"learning_rate": 0.00019784,
"loss": 0.0506,
"step": 540
},
{
"epoch": 0.08320096815672037,
"grad_norm": 0.08591905236244202,
"learning_rate": 0.0001978,
"loss": 0.0603,
"step": 550
},
{
"epoch": 0.0847137130322971,
"grad_norm": 0.05921874940395355,
"learning_rate": 0.00019776,
"loss": 0.0562,
"step": 560
},
{
"epoch": 0.08622645790787384,
"grad_norm": 0.058868613094091415,
"learning_rate": 0.00019772000000000002,
"loss": 0.0517,
"step": 570
},
{
"epoch": 0.08773920278345057,
"grad_norm": 0.06818246096372604,
"learning_rate": 0.00019768,
"loss": 0.0478,
"step": 580
},
{
"epoch": 0.0892519476590273,
"grad_norm": 0.07364825904369354,
"learning_rate": 0.00019764,
"loss": 0.0553,
"step": 590
},
{
"epoch": 0.09076469253460404,
"grad_norm": 0.07647281885147095,
"learning_rate": 0.0001976,
"loss": 0.0527,
"step": 600
},
{
"epoch": 0.09076469253460404,
"eval_cer": 0.282631389088609,
"eval_loss": 0.047340717166662216,
"eval_runtime": 10466.4392,
"eval_samples_per_second": 2.011,
"eval_steps_per_second": 0.251,
"step": 600
},
{
"epoch": 0.09227743741018077,
"grad_norm": 0.0819125548005104,
"learning_rate": 0.00019756,
"loss": 0.0509,
"step": 610
},
{
"epoch": 0.0937901822857575,
"grad_norm": 0.06566735357046127,
"learning_rate": 0.00019752000000000002,
"loss": 0.0583,
"step": 620
},
{
"epoch": 0.09530292716133425,
"grad_norm": 0.06856215745210648,
"learning_rate": 0.00019748,
"loss": 0.0465,
"step": 630
},
{
"epoch": 0.09681567203691098,
"grad_norm": 0.06130633130669594,
"learning_rate": 0.00019744,
"loss": 0.0509,
"step": 640
},
{
"epoch": 0.09832841691248771,
"grad_norm": 0.08208902925252914,
"learning_rate": 0.0001974,
"loss": 0.0549,
"step": 650
},
{
"epoch": 0.09984116178806444,
"grad_norm": 0.08106379210948944,
"learning_rate": 0.00019736000000000002,
"loss": 0.0584,
"step": 660
},
{
"epoch": 0.10135390666364118,
"grad_norm": 0.08364614844322205,
"learning_rate": 0.00019732000000000001,
"loss": 0.0543,
"step": 670
},
{
"epoch": 0.10286665153921791,
"grad_norm": 0.06432674080133438,
"learning_rate": 0.00019728,
"loss": 0.0535,
"step": 680
},
{
"epoch": 0.10437939641479464,
"grad_norm": 0.07217614352703094,
"learning_rate": 0.00019724,
"loss": 0.0521,
"step": 690
},
{
"epoch": 0.10589214129037137,
"grad_norm": 0.06074230372905731,
"learning_rate": 0.0001972,
"loss": 0.0545,
"step": 700
},
{
"epoch": 0.10740488616594811,
"grad_norm": 0.04888018220663071,
"learning_rate": 0.00019716000000000002,
"loss": 0.0445,
"step": 710
},
{
"epoch": 0.10891763104152484,
"grad_norm": 0.07705683261156082,
"learning_rate": 0.00019712,
"loss": 0.0491,
"step": 720
},
{
"epoch": 0.11043037591710159,
"grad_norm": 0.06741231679916382,
"learning_rate": 0.00019708000000000003,
"loss": 0.053,
"step": 730
},
{
"epoch": 0.11194312079267832,
"grad_norm": 0.0673738569021225,
"learning_rate": 0.00019704,
"loss": 0.0473,
"step": 740
},
{
"epoch": 0.11345586566825505,
"grad_norm": 0.06236235797405243,
"learning_rate": 0.00019700000000000002,
"loss": 0.0538,
"step": 750
},
{
"epoch": 0.11496861054383178,
"grad_norm": 0.0538531057536602,
"learning_rate": 0.00019696,
"loss": 0.0414,
"step": 760
},
{
"epoch": 0.11648135541940852,
"grad_norm": 0.09818791598081589,
"learning_rate": 0.00019692,
"loss": 0.0551,
"step": 770
},
{
"epoch": 0.11799410029498525,
"grad_norm": 0.06459952145814896,
"learning_rate": 0.00019688000000000003,
"loss": 0.0543,
"step": 780
},
{
"epoch": 0.11950684517056198,
"grad_norm": 0.09495878219604492,
"learning_rate": 0.00019684,
"loss": 0.0566,
"step": 790
},
{
"epoch": 0.12101959004613871,
"grad_norm": 0.06249309703707695,
"learning_rate": 0.0001968,
"loss": 0.0492,
"step": 800
},
{
"epoch": 0.12101959004613871,
"eval_cer": 0.0030890735373690806,
"eval_loss": 0.046879783272743225,
"eval_runtime": 10443.0859,
"eval_samples_per_second": 2.016,
"eval_steps_per_second": 0.252,
"step": 800
},
{
"epoch": 0.12253233492171545,
"grad_norm": 0.06483816355466843,
"learning_rate": 0.00019676,
"loss": 0.048,
"step": 810
},
{
"epoch": 0.1240450797972922,
"grad_norm": 0.05618014931678772,
"learning_rate": 0.00019672000000000003,
"loss": 0.0484,
"step": 820
},
{
"epoch": 0.1255578246728689,
"grad_norm": 0.07441507279872894,
"learning_rate": 0.00019668000000000002,
"loss": 0.0548,
"step": 830
},
{
"epoch": 0.12707056954844564,
"grad_norm": 0.05274181067943573,
"learning_rate": 0.00019664000000000001,
"loss": 0.0619,
"step": 840
},
{
"epoch": 0.12858331442402238,
"grad_norm": 0.06264190375804901,
"learning_rate": 0.0001966,
"loss": 0.0525,
"step": 850
},
{
"epoch": 0.1300960592995991,
"grad_norm": 0.07662319391965866,
"learning_rate": 0.00019656,
"loss": 0.0532,
"step": 860
},
{
"epoch": 0.13160880417517587,
"grad_norm": 0.06203316152095795,
"learning_rate": 0.00019652000000000002,
"loss": 0.0525,
"step": 870
},
{
"epoch": 0.1331215490507526,
"grad_norm": 0.1326906681060791,
"learning_rate": 0.00019648000000000002,
"loss": 0.0539,
"step": 880
},
{
"epoch": 0.13463429392632933,
"grad_norm": 0.10350421816110611,
"learning_rate": 0.00019644,
"loss": 0.0556,
"step": 890
},
{
"epoch": 0.13614703880190607,
"grad_norm": 0.049543242901563644,
"learning_rate": 0.0001964,
"loss": 0.0482,
"step": 900
},
{
"epoch": 0.1376597836774828,
"grad_norm": 0.11776097118854523,
"learning_rate": 0.00019636000000000002,
"loss": 0.0538,
"step": 910
},
{
"epoch": 0.13917252855305953,
"grad_norm": 0.05535553768277168,
"learning_rate": 0.00019632000000000002,
"loss": 0.052,
"step": 920
},
{
"epoch": 0.14068527342863626,
"grad_norm": 0.05945896357297897,
"learning_rate": 0.00019628,
"loss": 0.0491,
"step": 930
},
{
"epoch": 0.142198018304213,
"grad_norm": 0.1228972002863884,
"learning_rate": 0.00019624,
"loss": 0.0511,
"step": 940
},
{
"epoch": 0.14371076317978973,
"grad_norm": 0.08868791162967682,
"learning_rate": 0.0001962,
"loss": 0.057,
"step": 950
},
{
"epoch": 0.14522350805536646,
"grad_norm": 0.07960449159145355,
"learning_rate": 0.00019616000000000002,
"loss": 0.0514,
"step": 960
},
{
"epoch": 0.1467362529309432,
"grad_norm": 0.06392108649015427,
"learning_rate": 0.00019612,
"loss": 0.0558,
"step": 970
},
{
"epoch": 0.14824899780651993,
"grad_norm": 0.07048727571964264,
"learning_rate": 0.00019608,
"loss": 0.053,
"step": 980
},
{
"epoch": 0.14976174268209666,
"grad_norm": 0.10491488873958588,
"learning_rate": 0.00019604,
"loss": 0.0489,
"step": 990
},
{
"epoch": 0.1512744875576734,
"grad_norm": 0.059835776686668396,
"learning_rate": 0.000196,
"loss": 0.0474,
"step": 1000
},
{
"epoch": 0.1512744875576734,
"eval_cer": 0.4367181574025345,
"eval_loss": 0.04569260776042938,
"eval_runtime": 10457.5718,
"eval_samples_per_second": 2.013,
"eval_steps_per_second": 0.252,
"step": 1000
},
{
"epoch": 0.15278723243325013,
"grad_norm": 0.07570289075374603,
"learning_rate": 0.00019596000000000001,
"loss": 0.0522,
"step": 1010
},
{
"epoch": 0.15429997730882686,
"grad_norm": 0.09082864969968796,
"learning_rate": 0.00019592,
"loss": 0.0516,
"step": 1020
},
{
"epoch": 0.1558127221844036,
"grad_norm": 0.06894449889659882,
"learning_rate": 0.00019588000000000003,
"loss": 0.0489,
"step": 1030
},
{
"epoch": 0.15732546705998032,
"grad_norm": 0.05989064276218414,
"learning_rate": 0.00019584,
"loss": 0.0514,
"step": 1040
},
{
"epoch": 0.15883821193555706,
"grad_norm": 0.060047443956136703,
"learning_rate": 0.00019580000000000002,
"loss": 0.047,
"step": 1050
},
{
"epoch": 0.16035095681113382,
"grad_norm": 0.06459174305200577,
"learning_rate": 0.00019576,
"loss": 0.0532,
"step": 1060
},
{
"epoch": 0.16186370168671055,
"grad_norm": 0.061583805829286575,
"learning_rate": 0.00019572,
"loss": 0.0485,
"step": 1070
},
{
"epoch": 0.16337644656228728,
"grad_norm": 0.060534268617630005,
"learning_rate": 0.00019568000000000002,
"loss": 0.0468,
"step": 1080
},
{
"epoch": 0.164889191437864,
"grad_norm": 0.06731607764959335,
"learning_rate": 0.00019564,
"loss": 0.0481,
"step": 1090
},
{
"epoch": 0.16640193631344075,
"grad_norm": 0.0757998675107956,
"learning_rate": 0.0001956,
"loss": 0.056,
"step": 1100
},
{
"epoch": 0.16791468118901748,
"grad_norm": 0.08009450882673264,
"learning_rate": 0.00019556,
"loss": 0.0523,
"step": 1110
},
{
"epoch": 0.1694274260645942,
"grad_norm": 2.663090944290161,
"learning_rate": 0.00019552000000000003,
"loss": 0.1404,
"step": 1120
},
{
"epoch": 0.17094017094017094,
"grad_norm": 14.877944946289062,
"learning_rate": 0.00019548000000000002,
"loss": 0.1442,
"step": 1130
},
{
"epoch": 0.17245291581574768,
"grad_norm": 2.8173887729644775,
"learning_rate": 0.000195448,
"loss": 0.5461,
"step": 1140
},
{
"epoch": 0.1739656606913244,
"grad_norm": 9.367515563964844,
"learning_rate": 0.00019540800000000002,
"loss": 0.2832,
"step": 1150
},
{
"epoch": 0.17547840556690114,
"grad_norm": 0.34991636872291565,
"learning_rate": 0.00019536800000000002,
"loss": 0.1497,
"step": 1160
},
{
"epoch": 0.17699115044247787,
"grad_norm": 0.10464385151863098,
"learning_rate": 0.000195328,
"loss": 0.0686,
"step": 1170
},
{
"epoch": 0.1785038953180546,
"grad_norm": 0.8961012363433838,
"learning_rate": 0.000195288,
"loss": 0.0822,
"step": 1180
},
{
"epoch": 0.18001664019363134,
"grad_norm": 8.467473983764648,
"learning_rate": 0.000195248,
"loss": 0.0949,
"step": 1190
},
{
"epoch": 0.18152938506920807,
"grad_norm": 0.08059060573577881,
"learning_rate": 0.00019520800000000002,
"loss": 0.0552,
"step": 1200
},
{
"epoch": 0.18152938506920807,
"eval_cer": 0.0833932493767496,
"eval_loss": 0.04637393727898598,
"eval_runtime": 10459.5021,
"eval_samples_per_second": 2.013,
"eval_steps_per_second": 0.252,
"step": 1200
},
{
"epoch": 0.1830421299447848,
"grad_norm": 0.08795847743749619,
"learning_rate": 0.000195168,
"loss": 0.055,
"step": 1210
},
{
"epoch": 0.18455487482036154,
"grad_norm": 0.10272721946239471,
"learning_rate": 0.000195128,
"loss": 0.0557,
"step": 1220
},
{
"epoch": 0.18606761969593827,
"grad_norm": 0.23404774069786072,
"learning_rate": 0.000195088,
"loss": 0.0611,
"step": 1230
},
{
"epoch": 0.187580364571515,
"grad_norm": 0.2968621253967285,
"learning_rate": 0.00019504800000000002,
"loss": 0.0817,
"step": 1240
},
{
"epoch": 0.18909310944709176,
"grad_norm": 0.08634278923273087,
"learning_rate": 0.00019500800000000001,
"loss": 0.0685,
"step": 1250
},
{
"epoch": 0.1906058543226685,
"grad_norm": 0.11241244524717331,
"learning_rate": 0.000194968,
"loss": 0.0563,
"step": 1260
},
{
"epoch": 0.19211859919824523,
"grad_norm": 0.17380298674106598,
"learning_rate": 0.000194928,
"loss": 0.065,
"step": 1270
},
{
"epoch": 0.19363134407382196,
"grad_norm": 0.13615791499614716,
"learning_rate": 0.000194888,
"loss": 0.0667,
"step": 1280
},
{
"epoch": 0.1951440889493987,
"grad_norm": 0.0854301005601883,
"learning_rate": 0.00019484800000000002,
"loss": 0.0507,
"step": 1290
},
{
"epoch": 0.19665683382497542,
"grad_norm": 0.08915933966636658,
"learning_rate": 0.000194808,
"loss": 0.0561,
"step": 1300
},
{
"epoch": 0.19816957870055216,
"grad_norm": 0.09583040326833725,
"learning_rate": 0.00019476800000000003,
"loss": 0.0514,
"step": 1310
},
{
"epoch": 0.1996823235761289,
"grad_norm": 0.09624961763620377,
"learning_rate": 0.000194728,
"loss": 0.052,
"step": 1320
},
{
"epoch": 0.20119506845170562,
"grad_norm": 0.05612370744347572,
"learning_rate": 0.00019468800000000002,
"loss": 0.0471,
"step": 1330
},
{
"epoch": 0.20270781332728235,
"grad_norm": 0.0653730109333992,
"learning_rate": 0.000194648,
"loss": 0.0521,
"step": 1340
},
{
"epoch": 0.2042205582028591,
"grad_norm": 0.07432978600263596,
"learning_rate": 0.000194608,
"loss": 0.0577,
"step": 1350
},
{
"epoch": 0.20573330307843582,
"grad_norm": 0.05863150209188461,
"learning_rate": 0.00019456800000000003,
"loss": 0.0435,
"step": 1360
},
{
"epoch": 0.20724604795401255,
"grad_norm": 0.056969739496707916,
"learning_rate": 0.000194528,
"loss": 0.0502,
"step": 1370
},
{
"epoch": 0.20875879282958928,
"grad_norm": 0.10658754408359528,
"learning_rate": 0.000194488,
"loss": 0.0469,
"step": 1380
},
{
"epoch": 0.21027153770516602,
"grad_norm": 0.06535681337118149,
"learning_rate": 0.000194448,
"loss": 0.0519,
"step": 1390
},
{
"epoch": 0.21178428258074275,
"grad_norm": 0.08987314254045486,
"learning_rate": 0.000194408,
"loss": 0.0482,
"step": 1400
},
{
"epoch": 0.21178428258074275,
"eval_cer": 0.14607469615771385,
"eval_loss": 0.04351452365517616,
"eval_runtime": 10473.9712,
"eval_samples_per_second": 2.01,
"eval_steps_per_second": 0.251,
"step": 1400
},
{
"epoch": 0.21329702745631948,
"grad_norm": 0.09238473325967789,
"learning_rate": 0.00019436800000000002,
"loss": 0.0483,
"step": 1410
},
{
"epoch": 0.21480977233189621,
"grad_norm": 0.10443761199712753,
"learning_rate": 0.000194328,
"loss": 0.054,
"step": 1420
},
{
"epoch": 0.21632251720747295,
"grad_norm": 0.0742131844162941,
"learning_rate": 0.000194288,
"loss": 0.0507,
"step": 1430
},
{
"epoch": 0.21783526208304968,
"grad_norm": 0.09358492493629456,
"learning_rate": 0.000194248,
"loss": 0.0496,
"step": 1440
},
{
"epoch": 0.21934800695862644,
"grad_norm": 0.07695715129375458,
"learning_rate": 0.00019420800000000002,
"loss": 0.046,
"step": 1450
},
{
"epoch": 0.22086075183420317,
"grad_norm": 0.07772234827280045,
"learning_rate": 0.00019416800000000002,
"loss": 0.0468,
"step": 1460
},
{
"epoch": 0.2223734967097799,
"grad_norm": 0.04500894993543625,
"learning_rate": 0.000194128,
"loss": 0.0428,
"step": 1470
},
{
"epoch": 0.22388624158535664,
"grad_norm": 0.08258084207773209,
"learning_rate": 0.000194088,
"loss": 0.0542,
"step": 1480
},
{
"epoch": 0.22539898646093337,
"grad_norm": 0.06530752032995224,
"learning_rate": 0.000194048,
"loss": 0.0477,
"step": 1490
},
{
"epoch": 0.2269117313365101,
"grad_norm": 0.06770725548267365,
"learning_rate": 0.00019400800000000002,
"loss": 0.052,
"step": 1500
},
{
"epoch": 0.22842447621208684,
"grad_norm": 0.04499737173318863,
"learning_rate": 0.000193968,
"loss": 0.0392,
"step": 1510
},
{
"epoch": 0.22993722108766357,
"grad_norm": 0.0594199039041996,
"learning_rate": 0.000193928,
"loss": 0.0469,
"step": 1520
},
{
"epoch": 0.2314499659632403,
"grad_norm": 0.05143499746918678,
"learning_rate": 0.000193888,
"loss": 0.0384,
"step": 1530
},
{
"epoch": 0.23296271083881703,
"grad_norm": 0.05464276298880577,
"learning_rate": 0.00019384800000000002,
"loss": 0.0479,
"step": 1540
},
{
"epoch": 0.23447545571439377,
"grad_norm": 0.0698809027671814,
"learning_rate": 0.000193808,
"loss": 0.0493,
"step": 1550
},
{
"epoch": 0.2359882005899705,
"grad_norm": 0.059237249195575714,
"learning_rate": 0.000193768,
"loss": 0.0493,
"step": 1560
},
{
"epoch": 0.23750094546554723,
"grad_norm": 0.08654357492923737,
"learning_rate": 0.000193728,
"loss": 0.0481,
"step": 1570
},
{
"epoch": 0.23901369034112396,
"grad_norm": 0.19063305854797363,
"learning_rate": 0.000193688,
"loss": 0.051,
"step": 1580
},
{
"epoch": 0.2405264352167007,
"grad_norm": 0.08095410466194153,
"learning_rate": 0.000193648,
"loss": 0.0447,
"step": 1590
},
{
"epoch": 0.24203918009227743,
"grad_norm": 0.056007932871580124,
"learning_rate": 0.000193608,
"loss": 0.0431,
"step": 1600
},
{
"epoch": 0.24203918009227743,
"eval_cer": 0.1667197881072213,
"eval_loss": 0.04373455420136452,
"eval_runtime": 10595.1515,
"eval_samples_per_second": 1.987,
"eval_steps_per_second": 0.248,
"step": 1600
},
{
"epoch": 0.24355192496785416,
"grad_norm": 0.06981740891933441,
"learning_rate": 0.00019356800000000003,
"loss": 0.0442,
"step": 1610
},
{
"epoch": 0.2450646698434309,
"grad_norm": 0.10189545899629593,
"learning_rate": 0.000193528,
"loss": 0.0477,
"step": 1620
},
{
"epoch": 0.24657741471900763,
"grad_norm": 0.06565351039171219,
"learning_rate": 0.00019348800000000002,
"loss": 0.0532,
"step": 1630
},
{
"epoch": 0.2480901595945844,
"grad_norm": 0.06872796267271042,
"learning_rate": 0.000193448,
"loss": 0.0472,
"step": 1640
},
{
"epoch": 0.24960290447016112,
"grad_norm": 0.06040889397263527,
"learning_rate": 0.000193408,
"loss": 0.0463,
"step": 1650
},
{
"epoch": 0.2511156493457378,
"grad_norm": 0.08789139986038208,
"learning_rate": 0.00019336800000000002,
"loss": 0.0495,
"step": 1660
},
{
"epoch": 0.25262839422131456,
"grad_norm": 0.0869157686829567,
"learning_rate": 0.00019332800000000002,
"loss": 0.0491,
"step": 1670
},
{
"epoch": 0.2541411390968913,
"grad_norm": 0.06886725127696991,
"learning_rate": 0.000193288,
"loss": 0.0508,
"step": 1680
},
{
"epoch": 0.255653883972468,
"grad_norm": 0.06138046458363533,
"learning_rate": 0.000193248,
"loss": 0.0435,
"step": 1690
},
{
"epoch": 0.25716662884804475,
"grad_norm": 0.05554139241576195,
"learning_rate": 0.00019320800000000002,
"loss": 0.0483,
"step": 1700
},
{
"epoch": 0.2586793737236215,
"grad_norm": 0.06712419539690018,
"learning_rate": 0.00019316800000000002,
"loss": 0.0545,
"step": 1710
},
{
"epoch": 0.2601921185991982,
"grad_norm": 0.07289120554924011,
"learning_rate": 0.000193128,
"loss": 0.0481,
"step": 1720
},
{
"epoch": 0.261704863474775,
"grad_norm": 0.07003842294216156,
"learning_rate": 0.000193088,
"loss": 0.0493,
"step": 1730
},
{
"epoch": 0.26321760835035174,
"grad_norm": 0.06333723664283752,
"learning_rate": 0.000193048,
"loss": 0.0536,
"step": 1740
},
{
"epoch": 0.26473035322592847,
"grad_norm": 0.0609460324048996,
"learning_rate": 0.00019300800000000002,
"loss": 0.0516,
"step": 1750
},
{
"epoch": 0.2662430981015052,
"grad_norm": 0.14176234602928162,
"learning_rate": 0.000192968,
"loss": 0.0522,
"step": 1760
},
{
"epoch": 0.26775584297708194,
"grad_norm": 0.09526730328798294,
"learning_rate": 0.000192928,
"loss": 0.0468,
"step": 1770
},
{
"epoch": 0.26926858785265867,
"grad_norm": 0.05794398859143257,
"learning_rate": 0.000192888,
"loss": 0.051,
"step": 1780
},
{
"epoch": 0.2707813327282354,
"grad_norm": 0.07408788055181503,
"learning_rate": 0.000192848,
"loss": 0.0482,
"step": 1790
},
{
"epoch": 0.27229407760381213,
"grad_norm": 0.07873456180095673,
"learning_rate": 0.00019280800000000001,
"loss": 0.0576,
"step": 1800
},
{
"epoch": 0.27229407760381213,
"eval_cer": 0.28151275038111545,
"eval_loss": 0.042666129767894745,
"eval_runtime": 10460.0372,
"eval_samples_per_second": 2.013,
"eval_steps_per_second": 0.252,
"step": 1800
},
{
"epoch": 0.27380682247938887,
"grad_norm": 0.06786733120679855,
"learning_rate": 0.000192768,
"loss": 0.0505,
"step": 1810
},
{
"epoch": 0.2753195673549656,
"grad_norm": 0.090096116065979,
"learning_rate": 0.00019272800000000003,
"loss": 0.0458,
"step": 1820
},
{
"epoch": 0.27683231223054233,
"grad_norm": 0.058033574372529984,
"learning_rate": 0.000192688,
"loss": 0.0415,
"step": 1830
},
{
"epoch": 0.27834505710611906,
"grad_norm": 0.09522871673107147,
"learning_rate": 0.00019264800000000002,
"loss": 0.0456,
"step": 1840
},
{
"epoch": 0.2798578019816958,
"grad_norm": 0.06533698737621307,
"learning_rate": 0.000192608,
"loss": 0.045,
"step": 1850
},
{
"epoch": 0.28137054685727253,
"grad_norm": 0.07162319868803024,
"learning_rate": 0.000192568,
"loss": 0.0511,
"step": 1860
},
{
"epoch": 0.28288329173284926,
"grad_norm": 0.06015852093696594,
"learning_rate": 0.00019252800000000002,
"loss": 0.0453,
"step": 1870
},
{
"epoch": 0.284396036608426,
"grad_norm": 0.0789792612195015,
"learning_rate": 0.000192488,
"loss": 0.0498,
"step": 1880
},
{
"epoch": 0.2859087814840027,
"grad_norm": 0.05619093030691147,
"learning_rate": 0.000192448,
"loss": 0.0454,
"step": 1890
},
{
"epoch": 0.28742152635957946,
"grad_norm": 0.061943668872117996,
"learning_rate": 0.000192408,
"loss": 0.0496,
"step": 1900
},
{
"epoch": 0.2889342712351562,
"grad_norm": 0.07192958891391754,
"learning_rate": 0.00019236800000000003,
"loss": 0.05,
"step": 1910
},
{
"epoch": 0.2904470161107329,
"grad_norm": 0.07053862512111664,
"learning_rate": 0.00019232800000000002,
"loss": 0.0504,
"step": 1920
},
{
"epoch": 0.29195976098630966,
"grad_norm": 0.06491555273532867,
"learning_rate": 0.000192288,
"loss": 0.0478,
"step": 1930
},
{
"epoch": 0.2934725058618864,
"grad_norm": 0.06389233469963074,
"learning_rate": 0.000192248,
"loss": 0.0469,
"step": 1940
},
{
"epoch": 0.2949852507374631,
"grad_norm": 0.06336333602666855,
"learning_rate": 0.000192208,
"loss": 0.0472,
"step": 1950
},
{
"epoch": 0.29649799561303986,
"grad_norm": 0.06351201981306076,
"learning_rate": 0.00019216800000000002,
"loss": 0.0459,
"step": 1960
},
{
"epoch": 0.2980107404886166,
"grad_norm": 0.0773550271987915,
"learning_rate": 0.00019212800000000001,
"loss": 0.0435,
"step": 1970
},
{
"epoch": 0.2995234853641933,
"grad_norm": 0.07999245822429657,
"learning_rate": 0.000192088,
"loss": 0.051,
"step": 1980
},
{
"epoch": 0.30103623023977005,
"grad_norm": 0.05664638802409172,
"learning_rate": 0.000192048,
"loss": 0.0493,
"step": 1990
},
{
"epoch": 0.3025489751153468,
"grad_norm": 0.050149012356996536,
"learning_rate": 0.00019200800000000002,
"loss": 0.0491,
"step": 2000
},
{
"epoch": 0.3025489751153468,
"eval_cer": 0.10787543886957575,
"eval_loss": 0.042158011347055435,
"eval_runtime": 10458.1763,
"eval_samples_per_second": 2.013,
"eval_steps_per_second": 0.252,
"step": 2000
},
{
"epoch": 0.3040617199909235,
"grad_norm": 0.06383787840604782,
"learning_rate": 0.00019196800000000002,
"loss": 0.0421,
"step": 2010
},
{
"epoch": 0.30557446486650025,
"grad_norm": 0.05740641430020332,
"learning_rate": 0.000191928,
"loss": 0.0499,
"step": 2020
},
{
"epoch": 0.307087209742077,
"grad_norm": 0.07163075357675552,
"learning_rate": 0.000191888,
"loss": 0.0431,
"step": 2030
},
{
"epoch": 0.3085999546176537,
"grad_norm": 0.05976075306534767,
"learning_rate": 0.000191848,
"loss": 0.0476,
"step": 2040
},
{
"epoch": 0.31011269949323045,
"grad_norm": 0.0871894434094429,
"learning_rate": 0.00019180800000000002,
"loss": 0.0449,
"step": 2050
},
{
"epoch": 0.3116254443688072,
"grad_norm": 0.07474277913570404,
"learning_rate": 0.000191768,
"loss": 0.0422,
"step": 2060
},
{
"epoch": 0.3131381892443839,
"grad_norm": 0.05594407767057419,
"learning_rate": 0.00019172800000000003,
"loss": 0.0479,
"step": 2070
},
{
"epoch": 0.31465093411996065,
"grad_norm": 0.06565164029598236,
"learning_rate": 0.000191688,
"loss": 0.0501,
"step": 2080
},
{
"epoch": 0.3161636789955374,
"grad_norm": 0.07224603742361069,
"learning_rate": 0.000191648,
"loss": 0.0474,
"step": 2090
},
{
"epoch": 0.3176764238711141,
"grad_norm": 0.07781083881855011,
"learning_rate": 0.000191608,
"loss": 0.0401,
"step": 2100
},
{
"epoch": 0.31918916874669084,
"grad_norm": 0.08147955685853958,
"learning_rate": 0.000191568,
"loss": 0.0486,
"step": 2110
},
{
"epoch": 0.32070191362226763,
"grad_norm": 0.05572337657213211,
"learning_rate": 0.00019152800000000003,
"loss": 0.0488,
"step": 2120
},
{
"epoch": 0.32221465849784436,
"grad_norm": 0.06601813435554504,
"learning_rate": 0.000191488,
"loss": 0.0466,
"step": 2130
},
{
"epoch": 0.3237274033734211,
"grad_norm": 0.057904861867427826,
"learning_rate": 0.00019144800000000001,
"loss": 0.0479,
"step": 2140
},
{
"epoch": 0.32524014824899783,
"grad_norm": 0.057231709361076355,
"learning_rate": 0.000191408,
"loss": 0.0522,
"step": 2150
},
{
"epoch": 0.32675289312457456,
"grad_norm": 0.08306867629289627,
"learning_rate": 0.000191368,
"loss": 0.0439,
"step": 2160
},
{
"epoch": 0.3282656380001513,
"grad_norm": 0.0742512047290802,
"learning_rate": 0.00019132800000000002,
"loss": 0.0434,
"step": 2170
},
{
"epoch": 0.329778382875728,
"grad_norm": 0.07260335236787796,
"learning_rate": 0.000191288,
"loss": 0.0505,
"step": 2180
},
{
"epoch": 0.33129112775130476,
"grad_norm": 0.07398936152458191,
"learning_rate": 0.000191248,
"loss": 0.0519,
"step": 2190
},
{
"epoch": 0.3328038726268815,
"grad_norm": 0.069728784263134,
"learning_rate": 0.000191208,
"loss": 0.0501,
"step": 2200
},
{
"epoch": 0.3328038726268815,
"eval_cer": 0.07287520414693144,
"eval_loss": 0.041937489062547684,
"eval_runtime": 10449.7877,
"eval_samples_per_second": 2.015,
"eval_steps_per_second": 0.252,
"step": 2200
},
{
"epoch": 0.3343166175024582,
"grad_norm": 0.07778773456811905,
"learning_rate": 0.00019116800000000002,
"loss": 0.0485,
"step": 2210
},
{
"epoch": 0.33582936237803496,
"grad_norm": 0.08489017933607101,
"learning_rate": 0.00019112800000000002,
"loss": 0.047,
"step": 2220
},
{
"epoch": 0.3373421072536117,
"grad_norm": 0.0746629610657692,
"learning_rate": 0.000191088,
"loss": 0.0444,
"step": 2230
},
{
"epoch": 0.3388548521291884,
"grad_norm": 0.07858649641275406,
"learning_rate": 0.000191048,
"loss": 0.0537,
"step": 2240
},
{
"epoch": 0.34036759700476515,
"grad_norm": 0.08357574045658112,
"learning_rate": 0.000191008,
"loss": 0.054,
"step": 2250
},
{
"epoch": 0.3418803418803419,
"grad_norm": 0.05976574867963791,
"learning_rate": 0.00019096800000000002,
"loss": 0.0465,
"step": 2260
},
{
"epoch": 0.3433930867559186,
"grad_norm": 0.07549616694450378,
"learning_rate": 0.000190928,
"loss": 0.0479,
"step": 2270
},
{
"epoch": 0.34490583163149535,
"grad_norm": 0.07128783315420151,
"learning_rate": 0.000190888,
"loss": 0.0481,
"step": 2280
},
{
"epoch": 0.3464185765070721,
"grad_norm": 0.05093182995915413,
"learning_rate": 0.000190848,
"loss": 0.039,
"step": 2290
},
{
"epoch": 0.3479313213826488,
"grad_norm": 0.07213055342435837,
"learning_rate": 0.00019080800000000002,
"loss": 0.0486,
"step": 2300
},
{
"epoch": 0.34944406625822555,
"grad_norm": 0.08296896517276764,
"learning_rate": 0.00019076800000000001,
"loss": 0.0436,
"step": 2310
},
{
"epoch": 0.3509568111338023,
"grad_norm": 0.05904708430171013,
"learning_rate": 0.000190728,
"loss": 0.0457,
"step": 2320
},
{
"epoch": 0.352469556009379,
"grad_norm": 0.07709085941314697,
"learning_rate": 0.000190688,
"loss": 0.0456,
"step": 2330
},
{
"epoch": 0.35398230088495575,
"grad_norm": 0.061139535158872604,
"learning_rate": 0.000190648,
"loss": 0.0484,
"step": 2340
},
{
"epoch": 0.3554950457605325,
"grad_norm": 0.11013538390398026,
"learning_rate": 0.00019060800000000002,
"loss": 0.0463,
"step": 2350
},
{
"epoch": 0.3570077906361092,
"grad_norm": 0.04920123890042305,
"learning_rate": 0.000190568,
"loss": 0.0404,
"step": 2360
},
{
"epoch": 0.35852053551168594,
"grad_norm": 0.05916327238082886,
"learning_rate": 0.00019052800000000003,
"loss": 0.0506,
"step": 2370
},
{
"epoch": 0.3600332803872627,
"grad_norm": 0.08169171214103699,
"learning_rate": 0.000190488,
"loss": 0.0422,
"step": 2380
},
{
"epoch": 0.3615460252628394,
"grad_norm": 0.07195686548948288,
"learning_rate": 0.00019044800000000002,
"loss": 0.0476,
"step": 2390
},
{
"epoch": 0.36305877013841614,
"grad_norm": 0.06132512912154198,
"learning_rate": 0.000190408,
"loss": 0.0451,
"step": 2400
},
{
"epoch": 0.36305877013841614,
"eval_cer": 0.22885396051223894,
"eval_loss": 0.04164993762969971,
"eval_runtime": 10444.7845,
"eval_samples_per_second": 2.016,
"eval_steps_per_second": 0.252,
"step": 2400
},
{
"epoch": 0.3645715150139929,
"grad_norm": 0.06889329850673676,
"learning_rate": 0.000190368,
"loss": 0.0536,
"step": 2410
},
{
"epoch": 0.3660842598895696,
"grad_norm": 0.06513672322034836,
"learning_rate": 0.00019032800000000002,
"loss": 0.0472,
"step": 2420
},
{
"epoch": 0.36759700476514634,
"grad_norm": 0.06588304787874222,
"learning_rate": 0.000190288,
"loss": 0.046,
"step": 2430
},
{
"epoch": 0.3691097496407231,
"grad_norm": 0.07162468135356903,
"learning_rate": 0.000190248,
"loss": 0.0444,
"step": 2440
},
{
"epoch": 0.3706224945162998,
"grad_norm": 0.05831474810838699,
"learning_rate": 0.000190208,
"loss": 0.0448,
"step": 2450
},
{
"epoch": 0.37213523939187654,
"grad_norm": 0.11214031279087067,
"learning_rate": 0.000190168,
"loss": 0.0491,
"step": 2460
},
{
"epoch": 0.37364798426745327,
"grad_norm": 0.07672178000211716,
"learning_rate": 0.00019012800000000002,
"loss": 0.0489,
"step": 2470
},
{
"epoch": 0.37516072914303,
"grad_norm": 0.07850979268550873,
"learning_rate": 0.000190088,
"loss": 0.047,
"step": 2480
},
{
"epoch": 0.37667347401860674,
"grad_norm": 0.0473526194691658,
"learning_rate": 0.000190048,
"loss": 0.0436,
"step": 2490
},
{
"epoch": 0.3781862188941835,
"grad_norm": 0.08313214778900146,
"learning_rate": 0.000190008,
"loss": 0.0457,
"step": 2500
},
{
"epoch": 0.37969896376976026,
"grad_norm": 0.07851678878068924,
"learning_rate": 0.00018996800000000002,
"loss": 0.0399,
"step": 2510
},
{
"epoch": 0.381211708645337,
"grad_norm": 0.06067463755607605,
"learning_rate": 0.00018992800000000002,
"loss": 0.0406,
"step": 2520
},
{
"epoch": 0.3827244535209137,
"grad_norm": 0.07291869819164276,
"learning_rate": 0.000189888,
"loss": 0.0411,
"step": 2530
},
{
"epoch": 0.38423719839649045,
"grad_norm": 0.05576318874955177,
"learning_rate": 0.000189848,
"loss": 0.0412,
"step": 2540
},
{
"epoch": 0.3857499432720672,
"grad_norm": 0.05669853091239929,
"learning_rate": 0.000189808,
"loss": 0.0462,
"step": 2550
},
{
"epoch": 0.3872626881476439,
"grad_norm": 0.0653596743941307,
"learning_rate": 0.00018976800000000002,
"loss": 0.0504,
"step": 2560
},
{
"epoch": 0.38877543302322065,
"grad_norm": 0.07938168197870255,
"learning_rate": 0.000189728,
"loss": 0.0423,
"step": 2570
},
{
"epoch": 0.3902881778987974,
"grad_norm": 0.19600598514080048,
"learning_rate": 0.000189688,
"loss": 0.0422,
"step": 2580
},
{
"epoch": 0.3918009227743741,
"grad_norm": 0.08753781765699387,
"learning_rate": 0.000189648,
"loss": 0.0485,
"step": 2590
},
{
"epoch": 0.39331366764995085,
"grad_norm": 0.07059615105390549,
"learning_rate": 0.00018960800000000002,
"loss": 0.0441,
"step": 2600
},
{
"epoch": 0.39331366764995085,
"eval_cer": 0.12797016798729038,
"eval_loss": 0.040877681225538254,
"eval_runtime": 10426.9488,
"eval_samples_per_second": 2.019,
"eval_steps_per_second": 0.252,
"step": 2600
},
{
"epoch": 0.3948264125255276,
"grad_norm": 0.07426866888999939,
"learning_rate": 0.000189568,
"loss": 0.0456,
"step": 2610
},
{
"epoch": 0.3963391574011043,
"grad_norm": 0.05869770795106888,
"learning_rate": 0.000189528,
"loss": 0.047,
"step": 2620
},
{
"epoch": 0.39785190227668105,
"grad_norm": 0.09353045374155045,
"learning_rate": 0.000189488,
"loss": 0.0457,
"step": 2630
},
{
"epoch": 0.3993646471522578,
"grad_norm": 0.083396315574646,
"learning_rate": 0.000189448,
"loss": 0.0441,
"step": 2640
},
{
"epoch": 0.4008773920278345,
"grad_norm": 0.0698527917265892,
"learning_rate": 0.000189408,
"loss": 0.0469,
"step": 2650
},
{
"epoch": 0.40239013690341124,
"grad_norm": 0.07554033398628235,
"learning_rate": 0.000189368,
"loss": 0.0523,
"step": 2660
},
{
"epoch": 0.403902881778988,
"grad_norm": 0.08026187121868134,
"learning_rate": 0.00018932800000000003,
"loss": 0.0492,
"step": 2670
},
{
"epoch": 0.4054156266545647,
"grad_norm": 0.0758117213845253,
"learning_rate": 0.000189288,
"loss": 0.0471,
"step": 2680
},
{
"epoch": 0.40692837153014144,
"grad_norm": 0.0716470330953598,
"learning_rate": 0.00018924800000000001,
"loss": 0.0401,
"step": 2690
},
{
"epoch": 0.4084411164057182,
"grad_norm": 0.07114976644515991,
"learning_rate": 0.000189208,
"loss": 0.0483,
"step": 2700
},
{
"epoch": 0.4099538612812949,
"grad_norm": 0.059242133051157,
"learning_rate": 0.000189168,
"loss": 0.0416,
"step": 2710
},
{
"epoch": 0.41146660615687164,
"grad_norm": 0.07214327901601791,
"learning_rate": 0.00018912800000000002,
"loss": 0.0446,
"step": 2720
},
{
"epoch": 0.41297935103244837,
"grad_norm": 0.0404672808945179,
"learning_rate": 0.000189088,
"loss": 0.0445,
"step": 2730
},
{
"epoch": 0.4144920959080251,
"grad_norm": 0.06663410365581512,
"learning_rate": 0.000189048,
"loss": 0.0435,
"step": 2740
},
{
"epoch": 0.41600484078360184,
"grad_norm": 0.0690486952662468,
"learning_rate": 0.000189008,
"loss": 0.048,
"step": 2750
},
{
"epoch": 0.41751758565917857,
"grad_norm": 0.07034830003976822,
"learning_rate": 0.00018896800000000002,
"loss": 0.0423,
"step": 2760
},
{
"epoch": 0.4190303305347553,
"grad_norm": 0.08420894294977188,
"learning_rate": 0.00018892800000000002,
"loss": 0.0525,
"step": 2770
},
{
"epoch": 0.42054307541033203,
"grad_norm": 0.07617480307817459,
"learning_rate": 0.000188888,
"loss": 0.0492,
"step": 2780
},
{
"epoch": 0.42205582028590877,
"grad_norm": 0.06841789186000824,
"learning_rate": 0.000188848,
"loss": 0.0427,
"step": 2790
},
{
"epoch": 0.4235685651614855,
"grad_norm": 0.07013357430696487,
"learning_rate": 0.000188808,
"loss": 0.04,
"step": 2800
},
{
"epoch": 0.4235685651614855,
"eval_cer": 0.26005539454405746,
"eval_loss": 0.04089580848813057,
"eval_runtime": 10530.3682,
"eval_samples_per_second": 1.999,
"eval_steps_per_second": 0.25,
"step": 2800
},
{
"epoch": 0.42508131003706223,
"grad_norm": 0.06432001292705536,
"learning_rate": 0.00018876800000000002,
"loss": 0.0402,
"step": 2810
},
{
"epoch": 0.42659405491263896,
"grad_norm": 0.06437406688928604,
"learning_rate": 0.000188728,
"loss": 0.0397,
"step": 2820
},
{
"epoch": 0.4281067997882157,
"grad_norm": 0.0579422190785408,
"learning_rate": 0.000188688,
"loss": 0.0431,
"step": 2830
},
{
"epoch": 0.42961954466379243,
"grad_norm": 0.0628400593996048,
"learning_rate": 0.000188648,
"loss": 0.0426,
"step": 2840
},
{
"epoch": 0.43113228953936916,
"grad_norm": 0.04976367950439453,
"learning_rate": 0.000188608,
"loss": 0.0448,
"step": 2850
},
{
"epoch": 0.4326450344149459,
"grad_norm": 0.07479149103164673,
"learning_rate": 0.00018856800000000001,
"loss": 0.0458,
"step": 2860
},
{
"epoch": 0.4341577792905226,
"grad_norm": 0.06853318214416504,
"learning_rate": 0.000188528,
"loss": 0.045,
"step": 2870
},
{
"epoch": 0.43567052416609936,
"grad_norm": 0.08534535765647888,
"learning_rate": 0.00018848800000000003,
"loss": 0.044,
"step": 2880
},
{
"epoch": 0.43718326904167615,
"grad_norm": 0.05148012563586235,
"learning_rate": 0.000188448,
"loss": 0.0448,
"step": 2890
},
{
"epoch": 0.4386960139172529,
"grad_norm": 0.073714479804039,
"learning_rate": 0.00018840800000000002,
"loss": 0.0388,
"step": 2900
},
{
"epoch": 0.4402087587928296,
"grad_norm": 0.06875050067901611,
"learning_rate": 0.000188368,
"loss": 0.0476,
"step": 2910
},
{
"epoch": 0.44172150366840635,
"grad_norm": 0.07048488408327103,
"learning_rate": 0.000188328,
"loss": 0.0537,
"step": 2920
},
{
"epoch": 0.4432342485439831,
"grad_norm": 0.06159156188368797,
"learning_rate": 0.00018828800000000002,
"loss": 0.0523,
"step": 2930
},
{
"epoch": 0.4447469934195598,
"grad_norm": 0.0851297378540039,
"learning_rate": 0.000188248,
"loss": 0.0466,
"step": 2940
},
{
"epoch": 0.44625973829513654,
"grad_norm": 0.07920840382575989,
"learning_rate": 0.000188208,
"loss": 0.0434,
"step": 2950
},
{
"epoch": 0.4477724831707133,
"grad_norm": 0.06767392158508301,
"learning_rate": 0.000188168,
"loss": 0.0446,
"step": 2960
},
{
"epoch": 0.44928522804629,
"grad_norm": 0.0621979758143425,
"learning_rate": 0.00018812800000000003,
"loss": 0.0514,
"step": 2970
},
{
"epoch": 0.45079797292186674,
"grad_norm": 0.06485885381698608,
"learning_rate": 0.00018808800000000002,
"loss": 0.0403,
"step": 2980
},
{
"epoch": 0.4523107177974435,
"grad_norm": 0.07618974149227142,
"learning_rate": 0.000188048,
"loss": 0.046,
"step": 2990
},
{
"epoch": 0.4538234626730202,
"grad_norm": 0.050627488642930984,
"learning_rate": 0.000188008,
"loss": 0.04,
"step": 3000
},
{
"epoch": 0.4538234626730202,
"eval_cer": 0.027385337988253985,
"eval_loss": 0.0410909466445446,
"eval_runtime": 11737.0194,
"eval_samples_per_second": 1.794,
"eval_steps_per_second": 0.224,
"step": 3000
},
{
"epoch": 0.45533620754859694,
"grad_norm": 0.07569224387407303,
"learning_rate": 0.000187968,
"loss": 0.0453,
"step": 3010
},
{
"epoch": 0.45684895242417367,
"grad_norm": 0.06267885118722916,
"learning_rate": 0.00018792800000000002,
"loss": 0.0519,
"step": 3020
},
{
"epoch": 0.4583616972997504,
"grad_norm": 0.0801217257976532,
"learning_rate": 0.00018788800000000001,
"loss": 0.0452,
"step": 3030
},
{
"epoch": 0.45987444217532714,
"grad_norm": 0.06966337561607361,
"learning_rate": 0.000187848,
"loss": 0.0459,
"step": 3040
},
{
"epoch": 0.46138718705090387,
"grad_norm": 0.05708028003573418,
"learning_rate": 0.000187808,
"loss": 0.0462,
"step": 3050
},
{
"epoch": 0.4628999319264806,
"grad_norm": 0.06033516675233841,
"learning_rate": 0.00018776800000000002,
"loss": 0.0459,
"step": 3060
},
{
"epoch": 0.46441267680205733,
"grad_norm": 0.06908197700977325,
"learning_rate": 0.00018772800000000002,
"loss": 0.048,
"step": 3070
},
{
"epoch": 0.46592542167763407,
"grad_norm": 0.0723978653550148,
"learning_rate": 0.000187688,
"loss": 0.047,
"step": 3080
},
{
"epoch": 0.4674381665532108,
"grad_norm": 0.06268727034330368,
"learning_rate": 0.000187648,
"loss": 0.0387,
"step": 3090
},
{
"epoch": 0.46895091142878753,
"grad_norm": 0.06796183437108994,
"learning_rate": 0.000187608,
"loss": 0.0379,
"step": 3100
},
{
"epoch": 0.47046365630436426,
"grad_norm": 0.08227751404047012,
"learning_rate": 0.00018756800000000002,
"loss": 0.0497,
"step": 3110
},
{
"epoch": 0.471976401179941,
"grad_norm": 0.06391087174415588,
"learning_rate": 0.000187528,
"loss": 0.045,
"step": 3120
},
{
"epoch": 0.47348914605551773,
"grad_norm": 0.09645809978246689,
"learning_rate": 0.00018748800000000003,
"loss": 0.0479,
"step": 3130
},
{
"epoch": 0.47500189093109446,
"grad_norm": 0.07187838107347488,
"learning_rate": 0.000187448,
"loss": 0.0438,
"step": 3140
},
{
"epoch": 0.4765146358066712,
"grad_norm": 0.06578271836042404,
"learning_rate": 0.00018740800000000002,
"loss": 0.0471,
"step": 3150
},
{
"epoch": 0.4780273806822479,
"grad_norm": 0.06598031520843506,
"learning_rate": 0.000187368,
"loss": 0.0463,
"step": 3160
},
{
"epoch": 0.47954012555782466,
"grad_norm": 0.06380560249090195,
"learning_rate": 0.000187328,
"loss": 0.0439,
"step": 3170
},
{
"epoch": 0.4810528704334014,
"grad_norm": 0.05300907790660858,
"learning_rate": 0.00018728800000000003,
"loss": 0.0385,
"step": 3180
},
{
"epoch": 0.4825656153089781,
"grad_norm": 0.08515879511833191,
"learning_rate": 0.000187248,
"loss": 0.0444,
"step": 3190
},
{
"epoch": 0.48407836018455486,
"grad_norm": 0.0779171735048294,
"learning_rate": 0.00018720800000000001,
"loss": 0.0453,
"step": 3200
},
{
"epoch": 0.48407836018455486,
"eval_cer": 0.010036246117811001,
"eval_loss": 0.04116720333695412,
"eval_runtime": 10575.268,
"eval_samples_per_second": 1.991,
"eval_steps_per_second": 0.249,
"step": 3200
},
{
"epoch": 0.4855911050601316,
"grad_norm": 0.07719563692808151,
"learning_rate": 0.000187168,
"loss": 0.0516,
"step": 3210
},
{
"epoch": 0.4871038499357083,
"grad_norm": 0.0623527429997921,
"learning_rate": 0.000187128,
"loss": 0.0412,
"step": 3220
},
{
"epoch": 0.48861659481128505,
"grad_norm": 0.05286158621311188,
"learning_rate": 0.00018708800000000002,
"loss": 0.0433,
"step": 3230
},
{
"epoch": 0.4901293396868618,
"grad_norm": 0.05317120626568794,
"learning_rate": 0.000187048,
"loss": 0.0451,
"step": 3240
},
{
"epoch": 0.4916420845624385,
"grad_norm": 0.06447257846593857,
"learning_rate": 0.000187008,
"loss": 0.0552,
"step": 3250
},
{
"epoch": 0.49315482943801525,
"grad_norm": 0.05432993173599243,
"learning_rate": 0.000186968,
"loss": 0.0454,
"step": 3260
},
{
"epoch": 0.49466757431359204,
"grad_norm": 0.07853369414806366,
"learning_rate": 0.00018692800000000002,
"loss": 0.0513,
"step": 3270
},
{
"epoch": 0.4961803191891688,
"grad_norm": 0.07532196491956711,
"learning_rate": 0.00018688800000000002,
"loss": 0.0494,
"step": 3280
},
{
"epoch": 0.4976930640647455,
"grad_norm": 0.0591423436999321,
"learning_rate": 0.000186848,
"loss": 0.0406,
"step": 3290
},
{
"epoch": 0.49920580894032224,
"grad_norm": 0.05588558688759804,
"learning_rate": 0.000186808,
"loss": 0.0454,
"step": 3300
},
{
"epoch": 0.5007185538158989,
"grad_norm": 0.06208329647779465,
"learning_rate": 0.000186768,
"loss": 0.0379,
"step": 3310
},
{
"epoch": 0.5022312986914756,
"grad_norm": 0.09954684972763062,
"learning_rate": 0.00018672800000000002,
"loss": 0.0441,
"step": 3320
},
{
"epoch": 0.5037440435670524,
"grad_norm": 0.06522241979837418,
"learning_rate": 0.000186688,
"loss": 0.0435,
"step": 3330
},
{
"epoch": 0.5052567884426291,
"grad_norm": 0.06771814823150635,
"learning_rate": 0.000186648,
"loss": 0.0407,
"step": 3340
},
{
"epoch": 0.5067695333182058,
"grad_norm": 0.09186646342277527,
"learning_rate": 0.000186608,
"loss": 0.0468,
"step": 3350
},
{
"epoch": 0.5082822781937826,
"grad_norm": 0.05741488188505173,
"learning_rate": 0.00018656800000000002,
"loss": 0.0427,
"step": 3360
},
{
"epoch": 0.5097950230693593,
"grad_norm": 0.078957200050354,
"learning_rate": 0.00018652800000000001,
"loss": 0.0524,
"step": 3370
},
{
"epoch": 0.511307767944936,
"grad_norm": 0.06480754166841507,
"learning_rate": 0.000186488,
"loss": 0.0491,
"step": 3380
},
{
"epoch": 0.5128205128205128,
"grad_norm": 0.07016266882419586,
"learning_rate": 0.000186448,
"loss": 0.0455,
"step": 3390
},
{
"epoch": 0.5143332576960895,
"grad_norm": 0.09549427777528763,
"learning_rate": 0.000186408,
"loss": 0.0435,
"step": 3400
},
{
"epoch": 0.5143332576960895,
"eval_cer": 0.06014582453123417,
"eval_loss": 0.040756821632385254,
"eval_runtime": 10458.365,
"eval_samples_per_second": 2.013,
"eval_steps_per_second": 0.252,
"step": 3400
},
{
"epoch": 0.5158460025716662,
"grad_norm": 0.06771855056285858,
"learning_rate": 0.00018636800000000002,
"loss": 0.0496,
"step": 3410
},
{
"epoch": 0.517358747447243,
"grad_norm": 0.051270436495542526,
"learning_rate": 0.000186328,
"loss": 0.0376,
"step": 3420
},
{
"epoch": 0.5188714923228197,
"grad_norm": 0.05424557998776436,
"learning_rate": 0.00018628800000000003,
"loss": 0.0455,
"step": 3430
},
{
"epoch": 0.5203842371983964,
"grad_norm": 0.07000952959060669,
"learning_rate": 0.000186248,
"loss": 0.0494,
"step": 3440
},
{
"epoch": 0.5218969820739732,
"grad_norm": 0.06696450710296631,
"learning_rate": 0.00018620800000000002,
"loss": 0.0449,
"step": 3450
},
{
"epoch": 0.52340972694955,
"grad_norm": 0.07243742048740387,
"learning_rate": 0.000186168,
"loss": 0.0481,
"step": 3460
},
{
"epoch": 0.5249224718251267,
"grad_norm": 0.07457748800516129,
"learning_rate": 0.000186128,
"loss": 0.0413,
"step": 3470
},
{
"epoch": 0.5264352167007035,
"grad_norm": 0.05373325198888779,
"learning_rate": 0.00018608800000000002,
"loss": 0.046,
"step": 3480
},
{
"epoch": 0.5279479615762802,
"grad_norm": 0.07769589871168137,
"learning_rate": 0.000186048,
"loss": 0.0443,
"step": 3490
},
{
"epoch": 0.5294607064518569,
"grad_norm": 0.05949350818991661,
"learning_rate": 0.000186008,
"loss": 0.0426,
"step": 3500
},
{
"epoch": 0.5309734513274337,
"grad_norm": 0.08557622879743576,
"learning_rate": 0.000185968,
"loss": 0.0436,
"step": 3510
},
{
"epoch": 0.5324861962030104,
"grad_norm": 0.07504332065582275,
"learning_rate": 0.00018592800000000003,
"loss": 0.045,
"step": 3520
},
{
"epoch": 0.5339989410785871,
"grad_norm": 0.08510497957468033,
"learning_rate": 0.00018588800000000002,
"loss": 0.0451,
"step": 3530
},
{
"epoch": 0.5355116859541639,
"grad_norm": 0.06645802408456802,
"learning_rate": 0.000185848,
"loss": 0.0459,
"step": 3540
},
{
"epoch": 0.5370244308297406,
"grad_norm": 0.05905970185995102,
"learning_rate": 0.000185808,
"loss": 0.0431,
"step": 3550
},
{
"epoch": 0.5385371757053173,
"grad_norm": 0.059341125190258026,
"learning_rate": 0.000185768,
"loss": 0.0521,
"step": 3560
},
{
"epoch": 0.5400499205808941,
"grad_norm": 0.07676515728235245,
"learning_rate": 0.00018572800000000002,
"loss": 0.0446,
"step": 3570
},
{
"epoch": 0.5415626654564708,
"grad_norm": 0.05860384181141853,
"learning_rate": 0.00018568800000000002,
"loss": 0.041,
"step": 3580
},
{
"epoch": 0.5430754103320475,
"grad_norm": 0.07133147865533829,
"learning_rate": 0.000185648,
"loss": 0.0479,
"step": 3590
},
{
"epoch": 0.5445881552076243,
"grad_norm": 0.058478474617004395,
"learning_rate": 0.000185608,
"loss": 0.0447,
"step": 3600
},
{
"epoch": 0.5445881552076243,
"eval_cer": 0.16368877753976077,
"eval_loss": 0.04047335311770439,
"eval_runtime": 10446.0422,
"eval_samples_per_second": 2.015,
"eval_steps_per_second": 0.252,
"step": 3600
},
{
"epoch": 0.546100900083201,
"grad_norm": 0.06725309789180756,
"learning_rate": 0.000185568,
"loss": 0.053,
"step": 3610
},
{
"epoch": 0.5476136449587777,
"grad_norm": 0.06334862858057022,
"learning_rate": 0.00018552800000000002,
"loss": 0.0451,
"step": 3620
},
{
"epoch": 0.5491263898343545,
"grad_norm": 0.12283937633037567,
"learning_rate": 0.000185488,
"loss": 0.0437,
"step": 3630
},
{
"epoch": 0.5506391347099312,
"grad_norm": 0.05931037664413452,
"learning_rate": 0.000185448,
"loss": 0.0431,
"step": 3640
},
{
"epoch": 0.5521518795855079,
"grad_norm": 0.05501909554004669,
"learning_rate": 0.000185408,
"loss": 0.0398,
"step": 3650
},
{
"epoch": 0.5536646244610847,
"grad_norm": 0.06066635251045227,
"learning_rate": 0.00018536800000000002,
"loss": 0.0497,
"step": 3660
},
{
"epoch": 0.5551773693366614,
"grad_norm": 0.1352480947971344,
"learning_rate": 0.000185328,
"loss": 0.0445,
"step": 3670
},
{
"epoch": 0.5566901142122381,
"grad_norm": 0.08712221682071686,
"learning_rate": 0.000185288,
"loss": 0.0485,
"step": 3680
},
{
"epoch": 0.5582028590878149,
"grad_norm": 0.06511665135622025,
"learning_rate": 0.000185248,
"loss": 0.0464,
"step": 3690
},
{
"epoch": 0.5597156039633916,
"grad_norm": 0.052760981023311615,
"learning_rate": 0.000185208,
"loss": 0.0417,
"step": 3700
},
{
"epoch": 0.5612283488389683,
"grad_norm": 0.05113260820508003,
"learning_rate": 0.000185168,
"loss": 0.0426,
"step": 3710
},
{
"epoch": 0.5627410937145451,
"grad_norm": 0.06565012037754059,
"learning_rate": 0.000185128,
"loss": 0.0397,
"step": 3720
},
{
"epoch": 0.5642538385901218,
"grad_norm": 0.0608823299407959,
"learning_rate": 0.00018508800000000003,
"loss": 0.0411,
"step": 3730
},
{
"epoch": 0.5657665834656985,
"grad_norm": 0.0670706033706665,
"learning_rate": 0.000185048,
"loss": 0.0495,
"step": 3740
},
{
"epoch": 0.5672793283412753,
"grad_norm": 0.07000606507062912,
"learning_rate": 0.00018500800000000001,
"loss": 0.0457,
"step": 3750
},
{
"epoch": 0.568792073216852,
"grad_norm": 0.08072007447481155,
"learning_rate": 0.000184968,
"loss": 0.0484,
"step": 3760
},
{
"epoch": 0.5703048180924287,
"grad_norm": 0.06795356422662735,
"learning_rate": 0.000184928,
"loss": 0.0495,
"step": 3770
},
{
"epoch": 0.5718175629680055,
"grad_norm": 0.3031274974346161,
"learning_rate": 0.00018488800000000002,
"loss": 0.0504,
"step": 3780
},
{
"epoch": 0.5733303078435822,
"grad_norm": 0.05166814848780632,
"learning_rate": 0.000184848,
"loss": 0.0442,
"step": 3790
},
{
"epoch": 0.5748430527191589,
"grad_norm": 0.08816450089216232,
"learning_rate": 0.000184808,
"loss": 0.0525,
"step": 3800
},
{
"epoch": 0.5748430527191589,
"eval_cer": 0.09852050611143642,
"eval_loss": 0.041136305779218674,
"eval_runtime": 10432.1011,
"eval_samples_per_second": 2.018,
"eval_steps_per_second": 0.252,
"step": 3800
},
{
"epoch": 0.5763557975947357,
"grad_norm": 0.06531400233507156,
"learning_rate": 0.000184768,
"loss": 0.0459,
"step": 3810
},
{
"epoch": 0.5778685424703124,
"grad_norm": 0.07049426436424255,
"learning_rate": 0.00018472800000000002,
"loss": 0.0386,
"step": 3820
},
{
"epoch": 0.5793812873458891,
"grad_norm": 0.07954803854227066,
"learning_rate": 0.00018468800000000002,
"loss": 0.0451,
"step": 3830
},
{
"epoch": 0.5808940322214659,
"grad_norm": 0.07543455064296722,
"learning_rate": 0.000184648,
"loss": 0.0406,
"step": 3840
},
{
"epoch": 0.5824067770970426,
"grad_norm": 0.08292882144451141,
"learning_rate": 0.000184608,
"loss": 0.0544,
"step": 3850
},
{
"epoch": 0.5839195219726193,
"grad_norm": 0.05814971402287483,
"learning_rate": 0.000184568,
"loss": 0.0441,
"step": 3860
},
{
"epoch": 0.585432266848196,
"grad_norm": 0.06112606078386307,
"learning_rate": 0.00018452800000000002,
"loss": 0.0482,
"step": 3870
},
{
"epoch": 0.5869450117237728,
"grad_norm": 0.08487452566623688,
"learning_rate": 0.000184488,
"loss": 0.0446,
"step": 3880
},
{
"epoch": 0.5884577565993495,
"grad_norm": 0.05025780200958252,
"learning_rate": 0.000184448,
"loss": 0.0453,
"step": 3890
},
{
"epoch": 0.5899705014749262,
"grad_norm": 0.10276935994625092,
"learning_rate": 0.000184408,
"loss": 0.0427,
"step": 3900
},
{
"epoch": 0.591483246350503,
"grad_norm": 0.11926810443401337,
"learning_rate": 0.000184368,
"loss": 0.0472,
"step": 3910
},
{
"epoch": 0.5929959912260797,
"grad_norm": 0.08615875244140625,
"learning_rate": 0.00018432800000000001,
"loss": 0.0504,
"step": 3920
},
{
"epoch": 0.5945087361016564,
"grad_norm": 0.05418393015861511,
"learning_rate": 0.000184288,
"loss": 0.0397,
"step": 3930
},
{
"epoch": 0.5960214809772332,
"grad_norm": 0.06980731338262558,
"learning_rate": 0.000184248,
"loss": 0.0407,
"step": 3940
},
{
"epoch": 0.5975342258528099,
"grad_norm": 0.07121722400188446,
"learning_rate": 0.000184208,
"loss": 0.0441,
"step": 3950
},
{
"epoch": 0.5990469707283866,
"grad_norm": 0.05750627815723419,
"learning_rate": 0.00018416800000000002,
"loss": 0.049,
"step": 3960
},
{
"epoch": 0.6005597156039634,
"grad_norm": 0.08207126706838608,
"learning_rate": 0.000184128,
"loss": 0.0475,
"step": 3970
},
{
"epoch": 0.6020724604795401,
"grad_norm": 0.07319646328687668,
"learning_rate": 0.000184088,
"loss": 0.0517,
"step": 3980
},
{
"epoch": 0.6035852053551168,
"grad_norm": 0.06762152910232544,
"learning_rate": 0.000184048,
"loss": 0.042,
"step": 3990
},
{
"epoch": 0.6050979502306936,
"grad_norm": 0.05603775382041931,
"learning_rate": 0.000184008,
"loss": 0.0434,
"step": 4000
},
{
"epoch": 0.6050979502306936,
"eval_cer": 0.2283245991802003,
"eval_loss": 0.03986261412501335,
"eval_runtime": 10464.7689,
"eval_samples_per_second": 2.012,
"eval_steps_per_second": 0.252,
"step": 4000
},
{
"epoch": 0.6066106951062703,
"grad_norm": 0.05094938725233078,
"learning_rate": 0.000183968,
"loss": 0.0493,
"step": 4010
},
{
"epoch": 0.608123439981847,
"grad_norm": 0.08996951580047607,
"learning_rate": 0.000183928,
"loss": 0.0475,
"step": 4020
},
{
"epoch": 0.6096361848574238,
"grad_norm": 0.07369961589574814,
"learning_rate": 0.00018388800000000003,
"loss": 0.0441,
"step": 4030
},
{
"epoch": 0.6111489297330005,
"grad_norm": 0.06135983020067215,
"learning_rate": 0.000183848,
"loss": 0.0421,
"step": 4040
},
{
"epoch": 0.6126616746085772,
"grad_norm": 0.04601254314184189,
"learning_rate": 0.000183808,
"loss": 0.037,
"step": 4050
},
{
"epoch": 0.614174419484154,
"grad_norm": 0.04949349910020828,
"learning_rate": 0.000183768,
"loss": 0.0424,
"step": 4060
},
{
"epoch": 0.6156871643597307,
"grad_norm": 0.08714490383863449,
"learning_rate": 0.000183728,
"loss": 0.0459,
"step": 4070
},
{
"epoch": 0.6171999092353074,
"grad_norm": 0.07733121514320374,
"learning_rate": 0.00018368800000000002,
"loss": 0.0423,
"step": 4080
},
{
"epoch": 0.6187126541108842,
"grad_norm": 0.070652537047863,
"learning_rate": 0.000183648,
"loss": 0.0417,
"step": 4090
},
{
"epoch": 0.6202253989864609,
"grad_norm": 0.08538975566625595,
"learning_rate": 0.000183608,
"loss": 0.045,
"step": 4100
},
{
"epoch": 0.6217381438620376,
"grad_norm": 0.07866961508989334,
"learning_rate": 0.000183568,
"loss": 0.0435,
"step": 4110
},
{
"epoch": 0.6232508887376144,
"grad_norm": 0.052214980125427246,
"learning_rate": 0.00018352800000000002,
"loss": 0.0389,
"step": 4120
},
{
"epoch": 0.6247636336131911,
"grad_norm": 0.07548975199460983,
"learning_rate": 0.00018348800000000002,
"loss": 0.0406,
"step": 4130
},
{
"epoch": 0.6262763784887678,
"grad_norm": 0.06064745783805847,
"learning_rate": 0.000183448,
"loss": 0.0405,
"step": 4140
},
{
"epoch": 0.6277891233643446,
"grad_norm": 0.06255548447370529,
"learning_rate": 0.000183408,
"loss": 0.0426,
"step": 4150
},
{
"epoch": 0.6293018682399213,
"grad_norm": 0.05550558492541313,
"learning_rate": 0.000183368,
"loss": 0.0432,
"step": 4160
},
{
"epoch": 0.630814613115498,
"grad_norm": 0.06224781274795532,
"learning_rate": 0.00018332800000000002,
"loss": 0.0489,
"step": 4170
},
{
"epoch": 0.6323273579910748,
"grad_norm": 0.04567689448595047,
"learning_rate": 0.000183288,
"loss": 0.0392,
"step": 4180
},
{
"epoch": 0.6338401028666515,
"grad_norm": 0.08686509728431702,
"learning_rate": 0.00018324800000000003,
"loss": 0.0503,
"step": 4190
},
{
"epoch": 0.6353528477422282,
"grad_norm": 0.039897847920656204,
"learning_rate": 0.000183208,
"loss": 0.0437,
"step": 4200
},
{
"epoch": 0.6353528477422282,
"eval_cer": 0.0028697931722888917,
"eval_loss": 0.03980256989598274,
"eval_runtime": 10439.5254,
"eval_samples_per_second": 2.017,
"eval_steps_per_second": 0.252,
"step": 4200
},
{
"epoch": 0.636865592617805,
"grad_norm": 0.07222657650709152,
"learning_rate": 0.00018316800000000002,
"loss": 0.0445,
"step": 4210
},
{
"epoch": 0.6383783374933817,
"grad_norm": 0.06796406954526901,
"learning_rate": 0.000183128,
"loss": 0.0452,
"step": 4220
},
{
"epoch": 0.6398910823689585,
"grad_norm": 0.07380914688110352,
"learning_rate": 0.000183088,
"loss": 0.0456,
"step": 4230
},
{
"epoch": 0.6414038272445353,
"grad_norm": 0.05780802294611931,
"learning_rate": 0.00018304800000000003,
"loss": 0.043,
"step": 4240
},
{
"epoch": 0.642916572120112,
"grad_norm": 0.07155787944793701,
"learning_rate": 0.000183008,
"loss": 0.0422,
"step": 4250
},
{
"epoch": 0.6444293169956887,
"grad_norm": 0.06419336050748825,
"learning_rate": 0.00018296800000000001,
"loss": 0.0453,
"step": 4260
},
{
"epoch": 0.6459420618712655,
"grad_norm": 0.06702402234077454,
"learning_rate": 0.000182928,
"loss": 0.0416,
"step": 4270
},
{
"epoch": 0.6474548067468422,
"grad_norm": 0.062247395515441895,
"learning_rate": 0.00018288800000000003,
"loss": 0.0431,
"step": 4280
},
{
"epoch": 0.6489675516224189,
"grad_norm": 0.05556045100092888,
"learning_rate": 0.00018284800000000002,
"loss": 0.0542,
"step": 4290
},
{
"epoch": 0.6504802964979957,
"grad_norm": 0.07586701959371567,
"learning_rate": 0.000182808,
"loss": 0.0476,
"step": 4300
},
{
"epoch": 0.6519930413735724,
"grad_norm": 0.056563302874565125,
"learning_rate": 0.000182768,
"loss": 0.0441,
"step": 4310
},
{
"epoch": 0.6535057862491491,
"grad_norm": 0.08210831135511398,
"learning_rate": 0.000182728,
"loss": 0.0428,
"step": 4320
},
{
"epoch": 0.6550185311247259,
"grad_norm": 0.06154036149382591,
"learning_rate": 0.00018268800000000002,
"loss": 0.0437,
"step": 4330
},
{
"epoch": 0.6565312760003026,
"grad_norm": 0.06387040764093399,
"learning_rate": 0.00018264800000000002,
"loss": 0.0503,
"step": 4340
},
{
"epoch": 0.6580440208758793,
"grad_norm": 0.07460694015026093,
"learning_rate": 0.000182608,
"loss": 0.0388,
"step": 4350
},
{
"epoch": 0.659556765751456,
"grad_norm": 0.05871427804231644,
"learning_rate": 0.000182568,
"loss": 0.0409,
"step": 4360
},
{
"epoch": 0.6610695106270328,
"grad_norm": 0.05525946244597435,
"learning_rate": 0.000182528,
"loss": 0.0403,
"step": 4370
},
{
"epoch": 0.6625822555026095,
"grad_norm": 0.07400190085172653,
"learning_rate": 0.00018248800000000002,
"loss": 0.0544,
"step": 4380
},
{
"epoch": 0.6640950003781863,
"grad_norm": 0.05236358568072319,
"learning_rate": 0.000182448,
"loss": 0.0424,
"step": 4390
},
{
"epoch": 0.665607745253763,
"grad_norm": 0.07223962247371674,
"learning_rate": 0.000182408,
"loss": 0.0427,
"step": 4400
},
{
"epoch": 0.665607745253763,
"eval_cer": 0.22895526186399429,
"eval_loss": 0.039881668984889984,
"eval_runtime": 10486.5948,
"eval_samples_per_second": 2.008,
"eval_steps_per_second": 0.251,
"step": 4400
},
{
"epoch": 0.6671204901293397,
"grad_norm": 0.04777299240231514,
"learning_rate": 0.000182368,
"loss": 0.0365,
"step": 4410
},
{
"epoch": 0.6686332350049164,
"grad_norm": 0.06789238750934601,
"learning_rate": 0.00018232800000000002,
"loss": 0.041,
"step": 4420
},
{
"epoch": 0.6701459798804932,
"grad_norm": 0.07556366920471191,
"learning_rate": 0.00018228800000000001,
"loss": 0.0454,
"step": 4430
},
{
"epoch": 0.6716587247560699,
"grad_norm": 0.05699057877063751,
"learning_rate": 0.000182248,
"loss": 0.0412,
"step": 4440
},
{
"epoch": 0.6731714696316466,
"grad_norm": 0.06115678697824478,
"learning_rate": 0.000182208,
"loss": 0.0494,
"step": 4450
},
{
"epoch": 0.6746842145072234,
"grad_norm": 0.16907750070095062,
"learning_rate": 0.000182168,
"loss": 0.0457,
"step": 4460
},
{
"epoch": 0.6761969593828001,
"grad_norm": 0.23710806667804718,
"learning_rate": 0.00018212800000000002,
"loss": 0.0491,
"step": 4470
},
{
"epoch": 0.6777097042583768,
"grad_norm": 0.13006287813186646,
"learning_rate": 0.000182088,
"loss": 0.0528,
"step": 4480
},
{
"epoch": 0.6792224491339536,
"grad_norm": 0.24661995470523834,
"learning_rate": 0.00018204800000000003,
"loss": 0.043,
"step": 4490
},
{
"epoch": 0.6807351940095303,
"grad_norm": 0.2757125198841095,
"learning_rate": 0.000182008,
"loss": 0.0477,
"step": 4500
},
{
"epoch": 0.682247938885107,
"grad_norm": 0.27585530281066895,
"learning_rate": 0.00018196800000000002,
"loss": 0.0486,
"step": 4510
},
{
"epoch": 0.6837606837606838,
"grad_norm": 0.10548703372478485,
"learning_rate": 0.000181928,
"loss": 0.0448,
"step": 4520
},
{
"epoch": 0.6852734286362605,
"grad_norm": 0.1989259272813797,
"learning_rate": 0.000181888,
"loss": 0.0508,
"step": 4530
},
{
"epoch": 0.6867861735118372,
"grad_norm": 0.10586623847484589,
"learning_rate": 0.00018184800000000002,
"loss": 0.0486,
"step": 4540
},
{
"epoch": 0.688298918387414,
"grad_norm": 0.09687965363264084,
"learning_rate": 0.000181808,
"loss": 0.0463,
"step": 4550
},
{
"epoch": 0.6898116632629907,
"grad_norm": 0.13362692296504974,
"learning_rate": 0.000181768,
"loss": 0.0441,
"step": 4560
},
{
"epoch": 0.6913244081385674,
"grad_norm": 0.07124081254005432,
"learning_rate": 0.000181728,
"loss": 0.0479,
"step": 4570
},
{
"epoch": 0.6928371530141442,
"grad_norm": 0.060886889696121216,
"learning_rate": 0.00018168800000000003,
"loss": 0.0425,
"step": 4580
},
{
"epoch": 0.6943498978897209,
"grad_norm": 0.09697773307561874,
"learning_rate": 0.00018164800000000002,
"loss": 0.0466,
"step": 4590
},
{
"epoch": 0.6958626427652976,
"grad_norm": 0.09655246883630753,
"learning_rate": 0.00018160800000000001,
"loss": 0.0423,
"step": 4600
},
{
"epoch": 0.6958626427652976,
"eval_cer": 0.3264485475609846,
"eval_loss": 0.04431215673685074,
"eval_runtime": 9966.6677,
"eval_samples_per_second": 2.112,
"eval_steps_per_second": 0.264,
"step": 4600
},
{
"epoch": 0.6973753876408744,
"grad_norm": 0.6920335292816162,
"learning_rate": 0.000181568,
"loss": 0.0612,
"step": 4610
},
{
"epoch": 0.6988881325164511,
"grad_norm": 21.773630142211914,
"learning_rate": 0.00018153600000000002,
"loss": 0.3452,
"step": 4620
},
{
"epoch": 0.7004008773920278,
"grad_norm": 0.6047945022583008,
"learning_rate": 0.0001815,
"loss": 0.8043,
"step": 4630
},
{
"epoch": 0.7019136222676046,
"grad_norm": 0.30588680505752563,
"learning_rate": 0.00018146000000000001,
"loss": 0.094,
"step": 4640
},
{
"epoch": 0.7034263671431813,
"grad_norm": 2.5436811447143555,
"learning_rate": 0.00018142,
"loss": 0.1421,
"step": 4650
},
{
"epoch": 0.704939112018758,
"grad_norm": 3.3921713829040527,
"learning_rate": 0.00018138000000000003,
"loss": 0.2285,
"step": 4660
},
{
"epoch": 0.7064518568943348,
"grad_norm": 6.751514434814453,
"learning_rate": 0.00018134,
"loss": 0.1609,
"step": 4670
},
{
"epoch": 0.7079646017699115,
"grad_norm": 0.2919982075691223,
"learning_rate": 0.00018130000000000002,
"loss": 0.0731,
"step": 4680
},
{
"epoch": 0.7094773466454882,
"grad_norm": 0.2757503092288971,
"learning_rate": 0.00018126,
"loss": 0.0553,
"step": 4690
},
{
"epoch": 0.710990091521065,
"grad_norm": 0.12121643126010895,
"learning_rate": 0.00018122,
"loss": 0.0637,
"step": 4700
},
{
"epoch": 0.7125028363966417,
"grad_norm": 0.6880851984024048,
"learning_rate": 0.00018118000000000002,
"loss": 0.0556,
"step": 4710
},
{
"epoch": 0.7140155812722184,
"grad_norm": 0.17397326231002808,
"learning_rate": 0.00018114,
"loss": 0.0619,
"step": 4720
},
{
"epoch": 0.7155283261477952,
"grad_norm": 0.4361652433872223,
"learning_rate": 0.0001811,
"loss": 0.052,
"step": 4730
},
{
"epoch": 0.7170410710233719,
"grad_norm": 0.08802498877048492,
"learning_rate": 0.00018106,
"loss": 0.0531,
"step": 4740
},
{
"epoch": 0.7185538158989486,
"grad_norm": 0.16508696973323822,
"learning_rate": 0.00018102000000000003,
"loss": 0.0519,
"step": 4750
},
{
"epoch": 0.7200665607745254,
"grad_norm": 0.1359723061323166,
"learning_rate": 0.00018098000000000002,
"loss": 0.0559,
"step": 4760
},
{
"epoch": 0.7215793056501021,
"grad_norm": 0.12716355919837952,
"learning_rate": 0.00018093999999999999,
"loss": 0.0478,
"step": 4770
},
{
"epoch": 0.7230920505256788,
"grad_norm": 0.24563723802566528,
"learning_rate": 0.0001809,
"loss": 0.0508,
"step": 4780
},
{
"epoch": 0.7246047954012556,
"grad_norm": 0.15526343882083893,
"learning_rate": 0.00018086,
"loss": 0.053,
"step": 4790
},
{
"epoch": 0.7261175402768323,
"grad_norm": 0.39961257576942444,
"learning_rate": 0.00018082000000000002,
"loss": 0.0543,
"step": 4800
},
{
"epoch": 0.7261175402768323,
"eval_cer": 0.8969592299120654,
"eval_loss": 0.04724743589758873,
"eval_runtime": 9508.4862,
"eval_samples_per_second": 2.214,
"eval_steps_per_second": 0.277,
"step": 4800
},
{
"epoch": 0.727630285152409,
"grad_norm": 0.11674599349498749,
"learning_rate": 0.00018078000000000001,
"loss": 0.045,
"step": 4810
},
{
"epoch": 0.7291430300279858,
"grad_norm": 0.12775878608226776,
"learning_rate": 0.00018074,
"loss": 0.0507,
"step": 4820
},
{
"epoch": 0.7306557749035625,
"grad_norm": 0.21720856428146362,
"learning_rate": 0.0001807,
"loss": 0.0507,
"step": 4830
},
{
"epoch": 0.7321685197791392,
"grad_norm": 0.09953787177801132,
"learning_rate": 0.00018066,
"loss": 0.0455,
"step": 4840
},
{
"epoch": 0.733681264654716,
"grad_norm": 0.1652969866991043,
"learning_rate": 0.00018062000000000002,
"loss": 0.058,
"step": 4850
},
{
"epoch": 0.7351940095302927,
"grad_norm": 0.15136420726776123,
"learning_rate": 0.00018058,
"loss": 0.0403,
"step": 4860
},
{
"epoch": 0.7367067544058694,
"grad_norm": 0.09294873476028442,
"learning_rate": 0.00018054,
"loss": 0.0454,
"step": 4870
},
{
"epoch": 0.7382194992814461,
"grad_norm": 0.06313528120517731,
"learning_rate": 0.0001805,
"loss": 0.0486,
"step": 4880
},
{
"epoch": 0.7397322441570229,
"grad_norm": 0.10854914039373398,
"learning_rate": 0.00018046000000000002,
"loss": 0.0419,
"step": 4890
},
{
"epoch": 0.7412449890325996,
"grad_norm": 0.08302963525056839,
"learning_rate": 0.00018042,
"loss": 0.0447,
"step": 4900
},
{
"epoch": 0.7427577339081763,
"grad_norm": 0.0761631429195404,
"learning_rate": 0.00018038,
"loss": 0.0446,
"step": 4910
},
{
"epoch": 0.7442704787837531,
"grad_norm": 0.10130470246076584,
"learning_rate": 0.00018034,
"loss": 0.045,
"step": 4920
},
{
"epoch": 0.7457832236593298,
"grad_norm": 0.18436622619628906,
"learning_rate": 0.0001803,
"loss": 0.0429,
"step": 4930
},
{
"epoch": 0.7472959685349065,
"grad_norm": 0.08756496757268906,
"learning_rate": 0.00018026,
"loss": 0.0444,
"step": 4940
},
{
"epoch": 0.7488087134104833,
"grad_norm": 0.0750514343380928,
"learning_rate": 0.00018022,
"loss": 0.0507,
"step": 4950
},
{
"epoch": 0.75032145828606,
"grad_norm": 0.07460404187440872,
"learning_rate": 0.00018018000000000003,
"loss": 0.0397,
"step": 4960
},
{
"epoch": 0.7518342031616367,
"grad_norm": 0.12696300446987152,
"learning_rate": 0.00018014,
"loss": 0.0412,
"step": 4970
},
{
"epoch": 0.7533469480372135,
"grad_norm": 0.09411120414733887,
"learning_rate": 0.00018010000000000001,
"loss": 0.0431,
"step": 4980
},
{
"epoch": 0.7548596929127902,
"grad_norm": 0.08611701428890228,
"learning_rate": 0.00018006,
"loss": 0.041,
"step": 4990
},
{
"epoch": 0.756372437788367,
"grad_norm": 0.07411106675863266,
"learning_rate": 0.00018002,
"loss": 0.0448,
"step": 5000
},
{
"epoch": 0.756372437788367,
"eval_cer": 0.9283299113242558,
"eval_loss": 0.0398402214050293,
"eval_runtime": 9972.2961,
"eval_samples_per_second": 2.111,
"eval_steps_per_second": 0.264,
"step": 5000
},
{
"epoch": 0.7578851826639438,
"grad_norm": 0.06552145630121231,
"learning_rate": 0.00017998000000000002,
"loss": 0.0411,
"step": 5010
},
{
"epoch": 0.7593979275395205,
"grad_norm": 0.14544987678527832,
"learning_rate": 0.00017994000000000002,
"loss": 0.0401,
"step": 5020
},
{
"epoch": 0.7609106724150972,
"grad_norm": 0.06693132221698761,
"learning_rate": 0.0001799,
"loss": 0.045,
"step": 5030
},
{
"epoch": 0.762423417290674,
"grad_norm": 0.08100226521492004,
"learning_rate": 0.00017986,
"loss": 0.0478,
"step": 5040
},
{
"epoch": 0.7639361621662507,
"grad_norm": 0.10020666569471359,
"learning_rate": 0.00017982000000000002,
"loss": 0.0484,
"step": 5050
},
{
"epoch": 0.7654489070418274,
"grad_norm": 0.055785536766052246,
"learning_rate": 0.00017978000000000002,
"loss": 0.0423,
"step": 5060
},
{
"epoch": 0.7669616519174042,
"grad_norm": 0.08791428059339523,
"learning_rate": 0.00017974,
"loss": 0.0433,
"step": 5070
},
{
"epoch": 0.7684743967929809,
"grad_norm": 0.10156507045030594,
"learning_rate": 0.0001797,
"loss": 0.0447,
"step": 5080
},
{
"epoch": 0.7699871416685576,
"grad_norm": 0.1160702183842659,
"learning_rate": 0.00017966,
"loss": 0.0388,
"step": 5090
},
{
"epoch": 0.7714998865441344,
"grad_norm": 0.08716849237680435,
"learning_rate": 0.00017962000000000002,
"loss": 0.0492,
"step": 5100
},
{
"epoch": 0.7730126314197111,
"grad_norm": 0.046968474984169006,
"learning_rate": 0.00017958,
"loss": 0.0434,
"step": 5110
},
{
"epoch": 0.7745253762952878,
"grad_norm": 0.06234806030988693,
"learning_rate": 0.00017954000000000003,
"loss": 0.0504,
"step": 5120
},
{
"epoch": 0.7760381211708646,
"grad_norm": 0.102174311876297,
"learning_rate": 0.0001795,
"loss": 0.044,
"step": 5130
},
{
"epoch": 0.7775508660464413,
"grad_norm": 0.0620570033788681,
"learning_rate": 0.00017946,
"loss": 0.0386,
"step": 5140
},
{
"epoch": 0.779063610922018,
"grad_norm": 0.057656314224004745,
"learning_rate": 0.00017942,
"loss": 0.043,
"step": 5150
},
{
"epoch": 0.7805763557975948,
"grad_norm": 0.08451346307992935,
"learning_rate": 0.00017938,
"loss": 0.0452,
"step": 5160
},
{
"epoch": 0.7820891006731715,
"grad_norm": 0.09557165950536728,
"learning_rate": 0.00017934000000000003,
"loss": 0.0437,
"step": 5170
},
{
"epoch": 0.7836018455487482,
"grad_norm": 0.12275496870279312,
"learning_rate": 0.0001793,
"loss": 0.0427,
"step": 5180
},
{
"epoch": 0.785114590424325,
"grad_norm": 0.3277435600757599,
"learning_rate": 0.00017926000000000002,
"loss": 0.045,
"step": 5190
},
{
"epoch": 0.7866273352999017,
"grad_norm": 0.12806734442710876,
"learning_rate": 0.00017922,
"loss": 0.0383,
"step": 5200
},
{
"epoch": 0.7866273352999017,
"eval_cer": 0.8426215554451947,
"eval_loss": 0.03898792341351509,
"eval_runtime": 10404.4584,
"eval_samples_per_second": 2.023,
"eval_steps_per_second": 0.253,
"step": 5200
},
{
"epoch": 0.7881400801754784,
"grad_norm": 0.07969816774129868,
"learning_rate": 0.00017918,
"loss": 0.0474,
"step": 5210
},
{
"epoch": 0.7896528250510552,
"grad_norm": 0.20492368936538696,
"learning_rate": 0.00017914000000000002,
"loss": 0.0423,
"step": 5220
},
{
"epoch": 0.7911655699266319,
"grad_norm": 0.0960281640291214,
"learning_rate": 0.0001791,
"loss": 0.0392,
"step": 5230
},
{
"epoch": 0.7926783148022086,
"grad_norm": 0.16566351056098938,
"learning_rate": 0.00017906,
"loss": 0.0415,
"step": 5240
},
{
"epoch": 0.7941910596777854,
"grad_norm": 0.12343327701091766,
"learning_rate": 0.00017902,
"loss": 0.0439,
"step": 5250
},
{
"epoch": 0.7957038045533621,
"grad_norm": 0.0732201486825943,
"learning_rate": 0.00017898000000000002,
"loss": 0.0462,
"step": 5260
},
{
"epoch": 0.7972165494289388,
"grad_norm": 0.07991164177656174,
"learning_rate": 0.00017894000000000002,
"loss": 0.0412,
"step": 5270
},
{
"epoch": 0.7987292943045156,
"grad_norm": 0.07868771255016327,
"learning_rate": 0.0001789,
"loss": 0.0458,
"step": 5280
},
{
"epoch": 0.8002420391800923,
"grad_norm": 0.07392987608909607,
"learning_rate": 0.00017886,
"loss": 0.0489,
"step": 5290
},
{
"epoch": 0.801754784055669,
"grad_norm": 0.08330372720956802,
"learning_rate": 0.00017882,
"loss": 0.0448,
"step": 5300
},
{
"epoch": 0.8032675289312458,
"grad_norm": 0.06118497997522354,
"learning_rate": 0.00017878000000000002,
"loss": 0.0406,
"step": 5310
},
{
"epoch": 0.8047802738068225,
"grad_norm": 0.14288772642612457,
"learning_rate": 0.00017874,
"loss": 0.0439,
"step": 5320
},
{
"epoch": 0.8062930186823992,
"grad_norm": 0.06868502497673035,
"learning_rate": 0.0001787,
"loss": 0.0439,
"step": 5330
},
{
"epoch": 0.807805763557976,
"grad_norm": 0.08165542781352997,
"learning_rate": 0.00017866,
"loss": 0.0449,
"step": 5340
},
{
"epoch": 0.8093185084335527,
"grad_norm": 0.08748511224985123,
"learning_rate": 0.00017862000000000002,
"loss": 0.0455,
"step": 5350
},
{
"epoch": 0.8108312533091294,
"grad_norm": 0.0799604058265686,
"learning_rate": 0.00017858000000000001,
"loss": 0.0466,
"step": 5360
},
{
"epoch": 0.8123439981847062,
"grad_norm": 0.09606848657131195,
"learning_rate": 0.00017854,
"loss": 0.0452,
"step": 5370
},
{
"epoch": 0.8138567430602829,
"grad_norm": 0.07232715934515,
"learning_rate": 0.0001785,
"loss": 0.0426,
"step": 5380
},
{
"epoch": 0.8153694879358596,
"grad_norm": 0.07278240472078323,
"learning_rate": 0.00017846,
"loss": 0.0468,
"step": 5390
},
{
"epoch": 0.8168822328114363,
"grad_norm": 0.06568820029497147,
"learning_rate": 0.00017842000000000002,
"loss": 0.0407,
"step": 5400
},
{
"epoch": 0.8168822328114363,
"eval_cer": 0.9304918304165957,
"eval_loss": 0.039248276501894,
"eval_runtime": 10433.9841,
"eval_samples_per_second": 2.018,
"eval_steps_per_second": 0.252,
"step": 5400
},
{
"epoch": 0.8183949776870131,
"grad_norm": 0.08667409420013428,
"learning_rate": 0.00017838,
"loss": 0.0504,
"step": 5410
},
{
"epoch": 0.8199077225625898,
"grad_norm": 0.0701778307557106,
"learning_rate": 0.00017834000000000003,
"loss": 0.0425,
"step": 5420
},
{
"epoch": 0.8214204674381665,
"grad_norm": 0.07078663259744644,
"learning_rate": 0.0001783,
"loss": 0.0456,
"step": 5430
},
{
"epoch": 0.8229332123137433,
"grad_norm": 0.08540530502796173,
"learning_rate": 0.00017826000000000002,
"loss": 0.0437,
"step": 5440
},
{
"epoch": 0.82444595718932,
"grad_norm": 0.044258490204811096,
"learning_rate": 0.00017822,
"loss": 0.0373,
"step": 5450
},
{
"epoch": 0.8259587020648967,
"grad_norm": 0.08837467432022095,
"learning_rate": 0.00017818,
"loss": 0.0418,
"step": 5460
},
{
"epoch": 0.8274714469404735,
"grad_norm": 0.06399261206388474,
"learning_rate": 0.00017814000000000003,
"loss": 0.0461,
"step": 5470
},
{
"epoch": 0.8289841918160502,
"grad_norm": 0.07160426676273346,
"learning_rate": 0.0001781,
"loss": 0.0384,
"step": 5480
},
{
"epoch": 0.8304969366916269,
"grad_norm": 0.06335125118494034,
"learning_rate": 0.00017806,
"loss": 0.04,
"step": 5490
},
{
"epoch": 0.8320096815672037,
"grad_norm": 0.10239727795124054,
"learning_rate": 0.00017802,
"loss": 0.0396,
"step": 5500
},
{
"epoch": 0.8335224264427804,
"grad_norm": 0.06797724217176437,
"learning_rate": 0.00017798,
"loss": 0.0406,
"step": 5510
},
{
"epoch": 0.8350351713183571,
"grad_norm": 0.08448281139135361,
"learning_rate": 0.00017794000000000002,
"loss": 0.0489,
"step": 5520
},
{
"epoch": 0.8365479161939339,
"grad_norm": 0.0817868560552597,
"learning_rate": 0.0001779,
"loss": 0.0437,
"step": 5530
},
{
"epoch": 0.8380606610695106,
"grad_norm": 0.12232506275177002,
"learning_rate": 0.00017786,
"loss": 0.0475,
"step": 5540
},
{
"epoch": 0.8395734059450873,
"grad_norm": 0.0839553102850914,
"learning_rate": 0.00017782,
"loss": 0.0447,
"step": 5550
},
{
"epoch": 0.8410861508206641,
"grad_norm": 0.07315023243427277,
"learning_rate": 0.00017778000000000002,
"loss": 0.0441,
"step": 5560
},
{
"epoch": 0.8425988956962408,
"grad_norm": 0.07943390309810638,
"learning_rate": 0.00017774000000000002,
"loss": 0.0457,
"step": 5570
},
{
"epoch": 0.8441116405718175,
"grad_norm": 0.07185439020395279,
"learning_rate": 0.0001777,
"loss": 0.0429,
"step": 5580
},
{
"epoch": 0.8456243854473943,
"grad_norm": 0.06304585933685303,
"learning_rate": 0.00017766,
"loss": 0.046,
"step": 5590
},
{
"epoch": 0.847137130322971,
"grad_norm": 0.07005342841148376,
"learning_rate": 0.00017762,
"loss": 0.0359,
"step": 5600
},
{
"epoch": 0.847137130322971,
"eval_cer": 0.5003496132017898,
"eval_loss": 0.038213107734918594,
"eval_runtime": 10454.3437,
"eval_samples_per_second": 2.014,
"eval_steps_per_second": 0.252,
"step": 5600
},
{
"epoch": 0.8486498751985477,
"grad_norm": 0.08005109429359436,
"learning_rate": 0.00017758000000000002,
"loss": 0.0491,
"step": 5610
},
{
"epoch": 0.8501626200741245,
"grad_norm": 0.07554598152637482,
"learning_rate": 0.00017754,
"loss": 0.0384,
"step": 5620
},
{
"epoch": 0.8516753649497012,
"grad_norm": 0.08396964520215988,
"learning_rate": 0.0001775,
"loss": 0.0439,
"step": 5630
},
{
"epoch": 0.8531881098252779,
"grad_norm": 0.08719771355390549,
"learning_rate": 0.00017746,
"loss": 0.0417,
"step": 5640
},
{
"epoch": 0.8547008547008547,
"grad_norm": 0.09563528001308441,
"learning_rate": 0.00017742000000000002,
"loss": 0.0456,
"step": 5650
},
{
"epoch": 0.8562135995764314,
"grad_norm": 0.07019315659999847,
"learning_rate": 0.00017738,
"loss": 0.0394,
"step": 5660
},
{
"epoch": 0.8577263444520081,
"grad_norm": 0.06756678968667984,
"learning_rate": 0.00017734,
"loss": 0.046,
"step": 5670
},
{
"epoch": 0.8592390893275849,
"grad_norm": 0.06660816073417664,
"learning_rate": 0.0001773,
"loss": 0.0415,
"step": 5680
},
{
"epoch": 0.8607518342031616,
"grad_norm": 0.10737419873476028,
"learning_rate": 0.00017726,
"loss": 0.0402,
"step": 5690
},
{
"epoch": 0.8622645790787383,
"grad_norm": 0.06818167865276337,
"learning_rate": 0.00017722000000000001,
"loss": 0.039,
"step": 5700
},
{
"epoch": 0.8637773239543151,
"grad_norm": 0.05077315866947174,
"learning_rate": 0.00017718,
"loss": 0.0376,
"step": 5710
},
{
"epoch": 0.8652900688298918,
"grad_norm": 0.08248795568943024,
"learning_rate": 0.00017714000000000003,
"loss": 0.0427,
"step": 5720
},
{
"epoch": 0.8668028137054685,
"grad_norm": 0.06273633241653442,
"learning_rate": 0.0001771,
"loss": 0.0405,
"step": 5730
},
{
"epoch": 0.8683155585810453,
"grad_norm": 0.11920665949583054,
"learning_rate": 0.00017706000000000002,
"loss": 0.0416,
"step": 5740
},
{
"epoch": 0.869828303456622,
"grad_norm": 0.061835162341594696,
"learning_rate": 0.00017702,
"loss": 0.0456,
"step": 5750
},
{
"epoch": 0.8713410483321987,
"grad_norm": 0.06891065835952759,
"learning_rate": 0.00017698,
"loss": 0.0435,
"step": 5760
},
{
"epoch": 0.8728537932077756,
"grad_norm": 0.06323794275522232,
"learning_rate": 0.00017694000000000002,
"loss": 0.0424,
"step": 5770
},
{
"epoch": 0.8743665380833523,
"grad_norm": 0.08218410611152649,
"learning_rate": 0.0001769,
"loss": 0.0428,
"step": 5780
},
{
"epoch": 0.875879282958929,
"grad_norm": 0.05943075567483902,
"learning_rate": 0.00017686,
"loss": 0.0373,
"step": 5790
},
{
"epoch": 0.8773920278345058,
"grad_norm": 0.09316141158342361,
"learning_rate": 0.00017682,
"loss": 0.0436,
"step": 5800
},
{
"epoch": 0.8773920278345058,
"eval_cer": 0.5988355286077488,
"eval_loss": 0.0380551740527153,
"eval_runtime": 10439.6932,
"eval_samples_per_second": 2.017,
"eval_steps_per_second": 0.252,
"step": 5800
},
{
"epoch": 0.8789047727100825,
"grad_norm": 0.06791754812002182,
"learning_rate": 0.00017678000000000003,
"loss": 0.0424,
"step": 5810
},
{
"epoch": 0.8804175175856592,
"grad_norm": 0.06572896242141724,
"learning_rate": 0.00017674000000000002,
"loss": 0.0446,
"step": 5820
},
{
"epoch": 0.881930262461236,
"grad_norm": 0.07208286970853806,
"learning_rate": 0.00017669999999999999,
"loss": 0.0438,
"step": 5830
},
{
"epoch": 0.8834430073368127,
"grad_norm": 0.08518756181001663,
"learning_rate": 0.00017666,
"loss": 0.0401,
"step": 5840
},
{
"epoch": 0.8849557522123894,
"grad_norm": 0.060736026614904404,
"learning_rate": 0.00017662,
"loss": 0.0393,
"step": 5850
},
{
"epoch": 0.8864684970879662,
"grad_norm": 0.0627061128616333,
"learning_rate": 0.00017658000000000002,
"loss": 0.0358,
"step": 5860
},
{
"epoch": 0.8879812419635429,
"grad_norm": 0.06178157031536102,
"learning_rate": 0.00017654000000000001,
"loss": 0.0467,
"step": 5870
},
{
"epoch": 0.8894939868391196,
"grad_norm": 0.0688227042555809,
"learning_rate": 0.0001765,
"loss": 0.0415,
"step": 5880
},
{
"epoch": 0.8910067317146964,
"grad_norm": 0.06773985177278519,
"learning_rate": 0.00017646,
"loss": 0.0354,
"step": 5890
},
{
"epoch": 0.8925194765902731,
"grad_norm": 0.09130257368087769,
"learning_rate": 0.00017642,
"loss": 0.0414,
"step": 5900
},
{
"epoch": 0.8940322214658498,
"grad_norm": 0.06815651059150696,
"learning_rate": 0.00017638000000000002,
"loss": 0.0495,
"step": 5910
},
{
"epoch": 0.8955449663414266,
"grad_norm": 0.07239062339067459,
"learning_rate": 0.00017634,
"loss": 0.0459,
"step": 5920
},
{
"epoch": 0.8970577112170033,
"grad_norm": 0.08951979130506516,
"learning_rate": 0.0001763,
"loss": 0.047,
"step": 5930
},
{
"epoch": 0.89857045609258,
"grad_norm": 0.07267329841852188,
"learning_rate": 0.00017626,
"loss": 0.0384,
"step": 5940
},
{
"epoch": 0.9000832009681567,
"grad_norm": 0.06272245943546295,
"learning_rate": 0.00017622000000000002,
"loss": 0.0373,
"step": 5950
},
{
"epoch": 0.9015959458437335,
"grad_norm": 0.07484642416238785,
"learning_rate": 0.00017618,
"loss": 0.0445,
"step": 5960
},
{
"epoch": 0.9031086907193102,
"grad_norm": 0.06894571334123611,
"learning_rate": 0.00017614,
"loss": 0.0418,
"step": 5970
},
{
"epoch": 0.904621435594887,
"grad_norm": 0.07352825254201889,
"learning_rate": 0.0001761,
"loss": 0.0361,
"step": 5980
},
{
"epoch": 0.9061341804704637,
"grad_norm": 0.07955580949783325,
"learning_rate": 0.00017606,
"loss": 0.0418,
"step": 5990
},
{
"epoch": 0.9076469253460404,
"grad_norm": 0.057830698788166046,
"learning_rate": 0.00017602,
"loss": 0.0359,
"step": 6000
},
{
"epoch": 0.9076469253460404,
"eval_cer": 0.5058427407698408,
"eval_loss": 0.038296379148960114,
"eval_runtime": 10426.1739,
"eval_samples_per_second": 2.019,
"eval_steps_per_second": 0.252,
"step": 6000
},
{
"epoch": 0.9091596702216171,
"grad_norm": 0.08560307323932648,
"learning_rate": 0.00017598,
"loss": 0.0465,
"step": 6010
},
{
"epoch": 0.9106724150971939,
"grad_norm": 0.06908106803894043,
"learning_rate": 0.00017594000000000003,
"loss": 0.0469,
"step": 6020
},
{
"epoch": 0.9121851599727706,
"grad_norm": 0.058405641466379166,
"learning_rate": 0.0001759,
"loss": 0.0459,
"step": 6030
},
{
"epoch": 0.9136979048483473,
"grad_norm": 0.06696103513240814,
"learning_rate": 0.00017586000000000001,
"loss": 0.0389,
"step": 6040
},
{
"epoch": 0.9152106497239241,
"grad_norm": 0.06927672773599625,
"learning_rate": 0.00017582,
"loss": 0.0369,
"step": 6050
},
{
"epoch": 0.9167233945995008,
"grad_norm": 0.11847919970750809,
"learning_rate": 0.00017578,
"loss": 0.0379,
"step": 6060
},
{
"epoch": 0.9182361394750775,
"grad_norm": 0.06731213629245758,
"learning_rate": 0.00017574000000000002,
"loss": 0.0492,
"step": 6070
},
{
"epoch": 0.9197488843506543,
"grad_norm": 0.06238566339015961,
"learning_rate": 0.0001757,
"loss": 0.0351,
"step": 6080
},
{
"epoch": 0.921261629226231,
"grad_norm": 0.07023432850837708,
"learning_rate": 0.00017566,
"loss": 0.0418,
"step": 6090
},
{
"epoch": 0.9227743741018077,
"grad_norm": 0.07269687950611115,
"learning_rate": 0.00017562,
"loss": 0.0473,
"step": 6100
},
{
"epoch": 0.9242871189773845,
"grad_norm": 0.0714830756187439,
"learning_rate": 0.00017558000000000002,
"loss": 0.0419,
"step": 6110
},
{
"epoch": 0.9257998638529612,
"grad_norm": 0.06455916166305542,
"learning_rate": 0.00017554000000000002,
"loss": 0.0386,
"step": 6120
},
{
"epoch": 0.9273126087285379,
"grad_norm": 0.0797223374247551,
"learning_rate": 0.0001755,
"loss": 0.0425,
"step": 6130
},
{
"epoch": 0.9288253536041147,
"grad_norm": 0.08360251039266586,
"learning_rate": 0.00017546,
"loss": 0.0414,
"step": 6140
},
{
"epoch": 0.9303380984796914,
"grad_norm": 0.06491956114768982,
"learning_rate": 0.00017542,
"loss": 0.0367,
"step": 6150
},
{
"epoch": 0.9318508433552681,
"grad_norm": 0.06236764043569565,
"learning_rate": 0.00017538000000000002,
"loss": 0.0514,
"step": 6160
},
{
"epoch": 0.9333635882308449,
"grad_norm": 0.08555632829666138,
"learning_rate": 0.00017534,
"loss": 0.041,
"step": 6170
},
{
"epoch": 0.9348763331064216,
"grad_norm": 0.08949322998523712,
"learning_rate": 0.0001753,
"loss": 0.0462,
"step": 6180
},
{
"epoch": 0.9363890779819983,
"grad_norm": 0.07832244038581848,
"learning_rate": 0.00017526,
"loss": 0.0471,
"step": 6190
},
{
"epoch": 0.9379018228575751,
"grad_norm": 0.06077546253800392,
"learning_rate": 0.00017522000000000002,
"loss": 0.0457,
"step": 6200
},
{
"epoch": 0.9379018228575751,
"eval_cer": 0.3344013213649492,
"eval_loss": 0.03830147907137871,
"eval_runtime": 10461.8882,
"eval_samples_per_second": 2.012,
"eval_steps_per_second": 0.252,
"step": 6200
},
{
"epoch": 0.9394145677331518,
"grad_norm": 0.048287175595760345,
"learning_rate": 0.00017518,
"loss": 0.0393,
"step": 6210
},
{
"epoch": 0.9409273126087285,
"grad_norm": 0.08072841167449951,
"learning_rate": 0.00017514,
"loss": 0.0447,
"step": 6220
},
{
"epoch": 0.9424400574843053,
"grad_norm": 0.07255307585000992,
"learning_rate": 0.0001751,
"loss": 0.0492,
"step": 6230
},
{
"epoch": 0.943952802359882,
"grad_norm": 0.05136171355843544,
"learning_rate": 0.00017506,
"loss": 0.0438,
"step": 6240
},
{
"epoch": 0.9454655472354587,
"grad_norm": 0.079404316842556,
"learning_rate": 0.00017502000000000001,
"loss": 0.0383,
"step": 6250
},
{
"epoch": 0.9469782921110355,
"grad_norm": 0.10744167119264603,
"learning_rate": 0.00017498,
"loss": 0.0406,
"step": 6260
},
{
"epoch": 0.9484910369866122,
"grad_norm": 0.09439695626497269,
"learning_rate": 0.00017494,
"loss": 0.0448,
"step": 6270
},
{
"epoch": 0.9500037818621889,
"grad_norm": 0.07746788114309311,
"learning_rate": 0.00017490000000000002,
"loss": 0.0425,
"step": 6280
},
{
"epoch": 0.9515165267377657,
"grad_norm": 0.161416694521904,
"learning_rate": 0.00017486,
"loss": 0.04,
"step": 6290
},
{
"epoch": 0.9530292716133424,
"grad_norm": 0.05279407650232315,
"learning_rate": 0.00017482,
"loss": 0.0387,
"step": 6300
},
{
"epoch": 0.9545420164889191,
"grad_norm": 0.06324402987957001,
"learning_rate": 0.00017478,
"loss": 0.0425,
"step": 6310
},
{
"epoch": 0.9560547613644959,
"grad_norm": 0.08716294914484024,
"learning_rate": 0.00017474000000000002,
"loss": 0.0436,
"step": 6320
},
{
"epoch": 0.9575675062400726,
"grad_norm": 0.08212625980377197,
"learning_rate": 0.00017470000000000002,
"loss": 0.0445,
"step": 6330
},
{
"epoch": 0.9590802511156493,
"grad_norm": 0.08856002241373062,
"learning_rate": 0.00017466,
"loss": 0.0385,
"step": 6340
},
{
"epoch": 0.960592995991226,
"grad_norm": 0.08907803148031235,
"learning_rate": 0.00017462,
"loss": 0.0451,
"step": 6350
},
{
"epoch": 0.9621057408668028,
"grad_norm": 0.053175825625658035,
"learning_rate": 0.00017458,
"loss": 0.0428,
"step": 6360
},
{
"epoch": 0.9636184857423795,
"grad_norm": 0.055600494146347046,
"learning_rate": 0.00017454000000000002,
"loss": 0.047,
"step": 6370
},
{
"epoch": 0.9651312306179562,
"grad_norm": 0.10455228388309479,
"learning_rate": 0.0001745,
"loss": 0.0517,
"step": 6380
},
{
"epoch": 0.966643975493533,
"grad_norm": 0.11780910938978195,
"learning_rate": 0.00017446,
"loss": 0.0414,
"step": 6390
},
{
"epoch": 0.9681567203691097,
"grad_norm": 0.12388743460178375,
"learning_rate": 0.00017442,
"loss": 0.0438,
"step": 6400
},
{
"epoch": 0.9681567203691097,
"eval_cer": 0.5869913004375724,
"eval_loss": 0.03873522952198982,
"eval_runtime": 10437.6142,
"eval_samples_per_second": 2.017,
"eval_steps_per_second": 0.252,
"step": 6400
},
{
"epoch": 0.9696694652446864,
"grad_norm": 0.07916050404310226,
"learning_rate": 0.00017438000000000002,
"loss": 0.0402,
"step": 6410
},
{
"epoch": 0.9711822101202632,
"grad_norm": 0.05646761879324913,
"learning_rate": 0.00017434000000000001,
"loss": 0.0425,
"step": 6420
},
{
"epoch": 0.9726949549958399,
"grad_norm": 0.08374381810426712,
"learning_rate": 0.0001743,
"loss": 0.041,
"step": 6430
},
{
"epoch": 0.9742076998714166,
"grad_norm": 0.06789222359657288,
"learning_rate": 0.00017426,
"loss": 0.0391,
"step": 6440
},
{
"epoch": 0.9757204447469934,
"grad_norm": 0.0788172036409378,
"learning_rate": 0.00017422,
"loss": 0.0449,
"step": 6450
},
{
"epoch": 0.9772331896225701,
"grad_norm": 0.1257173717021942,
"learning_rate": 0.00017418000000000002,
"loss": 0.0484,
"step": 6460
},
{
"epoch": 0.9787459344981468,
"grad_norm": 0.05888710170984268,
"learning_rate": 0.00017414,
"loss": 0.0387,
"step": 6470
},
{
"epoch": 0.9802586793737236,
"grad_norm": 0.07102910429239273,
"learning_rate": 0.00017410000000000003,
"loss": 0.0386,
"step": 6480
},
{
"epoch": 0.9817714242493003,
"grad_norm": 0.058048397302627563,
"learning_rate": 0.00017406,
"loss": 0.0415,
"step": 6490
},
{
"epoch": 0.983284169124877,
"grad_norm": 0.07222626358270645,
"learning_rate": 0.00017402000000000002,
"loss": 0.0378,
"step": 6500
},
{
"epoch": 0.9847969140004538,
"grad_norm": 0.06445878744125366,
"learning_rate": 0.00017398,
"loss": 0.0409,
"step": 6510
},
{
"epoch": 0.9863096588760305,
"grad_norm": 0.09191201627254486,
"learning_rate": 0.00017394,
"loss": 0.0414,
"step": 6520
},
{
"epoch": 0.9878224037516072,
"grad_norm": 0.08073204010725021,
"learning_rate": 0.00017390000000000003,
"loss": 0.0404,
"step": 6530
},
{
"epoch": 0.9893351486271841,
"grad_norm": 0.08427068591117859,
"learning_rate": 0.00017386,
"loss": 0.0398,
"step": 6540
},
{
"epoch": 0.9908478935027608,
"grad_norm": 0.19870494306087494,
"learning_rate": 0.00017382,
"loss": 0.0388,
"step": 6550
},
{
"epoch": 0.9923606383783375,
"grad_norm": 0.34985288977622986,
"learning_rate": 0.00017378,
"loss": 0.051,
"step": 6560
},
{
"epoch": 0.9938733832539143,
"grad_norm": 0.12121633440256119,
"learning_rate": 0.00017374000000000003,
"loss": 0.0385,
"step": 6570
},
{
"epoch": 0.995386128129491,
"grad_norm": 0.140520840883255,
"learning_rate": 0.00017370000000000002,
"loss": 0.0417,
"step": 6580
},
{
"epoch": 0.9968988730050677,
"grad_norm": 0.06655796617269516,
"learning_rate": 0.00017366,
"loss": 0.0394,
"step": 6590
},
{
"epoch": 0.9984116178806445,
"grad_norm": 0.07498542964458466,
"learning_rate": 0.00017362,
"loss": 0.0419,
"step": 6600
},
{
"epoch": 0.9984116178806445,
"eval_cer": 0.25282902555511905,
"eval_loss": 0.038411665707826614,
"eval_runtime": 10433.3935,
"eval_samples_per_second": 2.018,
"eval_steps_per_second": 0.252,
"step": 6600
},
{
"epoch": 0.9999243627562212,
"grad_norm": 0.25646254420280457,
"learning_rate": 0.00017358,
"loss": 0.039,
"step": 6610
},
{
"epoch": 1.0014371076317978,
"grad_norm": 0.07744245231151581,
"learning_rate": 0.00017354000000000002,
"loss": 0.0371,
"step": 6620
},
{
"epoch": 1.0029498525073746,
"grad_norm": 0.11968632787466049,
"learning_rate": 0.00017350000000000002,
"loss": 0.0303,
"step": 6630
},
{
"epoch": 1.0044625973829513,
"grad_norm": 0.07235859334468842,
"learning_rate": 0.00017346,
"loss": 0.0387,
"step": 6640
},
{
"epoch": 1.005975342258528,
"grad_norm": 0.12598702311515808,
"learning_rate": 0.00017342,
"loss": 0.0355,
"step": 6650
},
{
"epoch": 1.0074880871341048,
"grad_norm": 0.10832694917917252,
"learning_rate": 0.00017338,
"loss": 0.0297,
"step": 6660
},
{
"epoch": 1.0090008320096815,
"grad_norm": 0.13988302648067474,
"learning_rate": 0.00017334000000000002,
"loss": 0.0352,
"step": 6670
},
{
"epoch": 1.0105135768852582,
"grad_norm": 0.09534142911434174,
"learning_rate": 0.0001733,
"loss": 0.0308,
"step": 6680
},
{
"epoch": 1.012026321760835,
"grad_norm": 0.05622931197285652,
"learning_rate": 0.00017326,
"loss": 0.0311,
"step": 6690
},
{
"epoch": 1.0135390666364117,
"grad_norm": 0.06480368971824646,
"learning_rate": 0.00017322,
"loss": 0.033,
"step": 6700
},
{
"epoch": 1.0150518115119884,
"grad_norm": 0.08531224727630615,
"learning_rate": 0.00017318000000000002,
"loss": 0.0345,
"step": 6710
},
{
"epoch": 1.0165645563875652,
"grad_norm": 0.11494185030460358,
"learning_rate": 0.00017314,
"loss": 0.0292,
"step": 6720
},
{
"epoch": 1.0180773012631419,
"grad_norm": 0.06993953883647919,
"learning_rate": 0.0001731,
"loss": 0.0343,
"step": 6730
},
{
"epoch": 1.0195900461387186,
"grad_norm": 0.09449311345815659,
"learning_rate": 0.00017306,
"loss": 0.0285,
"step": 6740
},
{
"epoch": 1.0211027910142954,
"grad_norm": 0.10550418496131897,
"learning_rate": 0.00017302,
"loss": 0.0337,
"step": 6750
},
{
"epoch": 1.022615535889872,
"grad_norm": 0.06987041234970093,
"learning_rate": 0.00017298000000000001,
"loss": 0.0273,
"step": 6760
},
{
"epoch": 1.0241282807654488,
"grad_norm": 0.08014168590307236,
"learning_rate": 0.00017294,
"loss": 0.0318,
"step": 6770
},
{
"epoch": 1.0256410256410255,
"grad_norm": 0.04886119067668915,
"learning_rate": 0.00017290000000000003,
"loss": 0.0318,
"step": 6780
},
{
"epoch": 1.0271537705166023,
"grad_norm": 0.07735268771648407,
"learning_rate": 0.00017286,
"loss": 0.0377,
"step": 6790
},
{
"epoch": 1.028666515392179,
"grad_norm": 0.07365155220031738,
"learning_rate": 0.00017282000000000002,
"loss": 0.0397,
"step": 6800
},
{
"epoch": 1.028666515392179,
"eval_cer": 0.5956908628651482,
"eval_loss": 0.03884879872202873,
"eval_runtime": 10443.3198,
"eval_samples_per_second": 2.016,
"eval_steps_per_second": 0.252,
"step": 6800
},
{
"epoch": 1.0301792602677557,
"grad_norm": 0.08235965669155121,
"learning_rate": 0.00017278,
"loss": 0.0356,
"step": 6810
},
{
"epoch": 1.0316920051433325,
"grad_norm": 0.1203494668006897,
"learning_rate": 0.00017274,
"loss": 0.0391,
"step": 6820
},
{
"epoch": 1.0332047500189092,
"grad_norm": 0.059709157794713974,
"learning_rate": 0.00017270000000000002,
"loss": 0.036,
"step": 6830
},
{
"epoch": 1.034717494894486,
"grad_norm": 0.08380923420190811,
"learning_rate": 0.00017266,
"loss": 0.0311,
"step": 6840
},
{
"epoch": 1.0362302397700627,
"grad_norm": 0.0642111599445343,
"learning_rate": 0.00017262,
"loss": 0.0296,
"step": 6850
},
{
"epoch": 1.0377429846456394,
"grad_norm": 0.07701337337493896,
"learning_rate": 0.00017258,
"loss": 0.0318,
"step": 6860
},
{
"epoch": 1.0392557295212161,
"grad_norm": 0.09674856811761856,
"learning_rate": 0.00017254000000000003,
"loss": 0.0294,
"step": 6870
},
{
"epoch": 1.0407684743967929,
"grad_norm": 0.08543815463781357,
"learning_rate": 0.00017250000000000002,
"loss": 0.0322,
"step": 6880
},
{
"epoch": 1.0422812192723696,
"grad_norm": 0.08181754499673843,
"learning_rate": 0.00017246,
"loss": 0.031,
"step": 6890
},
{
"epoch": 1.0437939641479463,
"grad_norm": 0.07326922565698624,
"learning_rate": 0.00017242,
"loss": 0.0298,
"step": 6900
},
{
"epoch": 1.045306709023523,
"grad_norm": 0.060128018260002136,
"learning_rate": 0.00017238,
"loss": 0.0351,
"step": 6910
},
{
"epoch": 1.0468194538990998,
"grad_norm": 0.055250383913517,
"learning_rate": 0.00017234000000000002,
"loss": 0.0322,
"step": 6920
},
{
"epoch": 1.0483321987746765,
"grad_norm": 0.07841707766056061,
"learning_rate": 0.00017230000000000001,
"loss": 0.0311,
"step": 6930
},
{
"epoch": 1.0498449436502533,
"grad_norm": 0.06094701215624809,
"learning_rate": 0.00017226,
"loss": 0.0331,
"step": 6940
},
{
"epoch": 1.0513576885258302,
"grad_norm": 0.0738435760140419,
"learning_rate": 0.00017222,
"loss": 0.0385,
"step": 6950
},
{
"epoch": 1.052870433401407,
"grad_norm": 0.0741799846291542,
"learning_rate": 0.00017218,
"loss": 0.0332,
"step": 6960
},
{
"epoch": 1.0543831782769837,
"grad_norm": 0.11769600957632065,
"learning_rate": 0.00017214000000000002,
"loss": 0.0288,
"step": 6970
},
{
"epoch": 1.0558959231525604,
"grad_norm": 0.05547551065683365,
"learning_rate": 0.0001721,
"loss": 0.0351,
"step": 6980
},
{
"epoch": 1.0574086680281372,
"grad_norm": 0.059602439403533936,
"learning_rate": 0.00017206,
"loss": 0.0315,
"step": 6990
},
{
"epoch": 1.0589214129037139,
"grad_norm": 0.07523063570261002,
"learning_rate": 0.00017202,
"loss": 0.0344,
"step": 7000
},
{
"epoch": 1.0589214129037139,
"eval_cer": 0.06192848124566072,
"eval_loss": 0.03872867301106453,
"eval_runtime": 10423.0915,
"eval_samples_per_second": 2.02,
"eval_steps_per_second": 0.253,
"step": 7000
},
{
"epoch": 1.0604341577792906,
"grad_norm": 0.07334991544485092,
"learning_rate": 0.00017198000000000002,
"loss": 0.0394,
"step": 7010
},
{
"epoch": 1.0619469026548674,
"grad_norm": 0.08875437080860138,
"learning_rate": 0.00017194,
"loss": 0.0316,
"step": 7020
},
{
"epoch": 1.063459647530444,
"grad_norm": 0.06492207199335098,
"learning_rate": 0.0001719,
"loss": 0.0375,
"step": 7030
},
{
"epoch": 1.0649723924060208,
"grad_norm": 0.08707519620656967,
"learning_rate": 0.00017186,
"loss": 0.0333,
"step": 7040
},
{
"epoch": 1.0664851372815976,
"grad_norm": 0.06477733701467514,
"learning_rate": 0.00017182,
"loss": 0.036,
"step": 7050
},
{
"epoch": 1.0679978821571743,
"grad_norm": 0.05914880335330963,
"learning_rate": 0.00017178,
"loss": 0.0307,
"step": 7060
},
{
"epoch": 1.069510627032751,
"grad_norm": 0.11167873442173004,
"learning_rate": 0.00017174,
"loss": 0.0355,
"step": 7070
},
{
"epoch": 1.0710233719083277,
"grad_norm": 0.08664342761039734,
"learning_rate": 0.00017170000000000003,
"loss": 0.0373,
"step": 7080
},
{
"epoch": 1.0725361167839045,
"grad_norm": 0.06912154704332352,
"learning_rate": 0.00017166,
"loss": 0.0283,
"step": 7090
},
{
"epoch": 1.0740488616594812,
"grad_norm": 0.09120757132768631,
"learning_rate": 0.00017162000000000001,
"loss": 0.0313,
"step": 7100
},
{
"epoch": 1.075561606535058,
"grad_norm": 0.08159112185239792,
"learning_rate": 0.00017158,
"loss": 0.0413,
"step": 7110
},
{
"epoch": 1.0770743514106347,
"grad_norm": 0.095944344997406,
"learning_rate": 0.00017154,
"loss": 0.0355,
"step": 7120
},
{
"epoch": 1.0785870962862114,
"grad_norm": 0.10682930797338486,
"learning_rate": 0.00017150000000000002,
"loss": 0.0278,
"step": 7130
},
{
"epoch": 1.0800998411617881,
"grad_norm": 0.06514004617929459,
"learning_rate": 0.00017146,
"loss": 0.0306,
"step": 7140
},
{
"epoch": 1.0816125860373649,
"grad_norm": 0.07849156856536865,
"learning_rate": 0.00017142,
"loss": 0.0379,
"step": 7150
},
{
"epoch": 1.0831253309129416,
"grad_norm": 0.0788741260766983,
"learning_rate": 0.00017138,
"loss": 0.032,
"step": 7160
},
{
"epoch": 1.0846380757885183,
"grad_norm": 0.10495191067457199,
"learning_rate": 0.00017134000000000002,
"loss": 0.0358,
"step": 7170
},
{
"epoch": 1.086150820664095,
"grad_norm": 0.07463409751653671,
"learning_rate": 0.00017130000000000002,
"loss": 0.0356,
"step": 7180
},
{
"epoch": 1.0876635655396718,
"grad_norm": 0.08425049483776093,
"learning_rate": 0.00017126,
"loss": 0.0327,
"step": 7190
},
{
"epoch": 1.0891763104152485,
"grad_norm": 0.07767146825790405,
"learning_rate": 0.00017122,
"loss": 0.034,
"step": 7200
},
{
"epoch": 1.0891763104152485,
"eval_cer": 0.09758161553419167,
"eval_loss": 0.037929706275463104,
"eval_runtime": 10420.1284,
"eval_samples_per_second": 2.02,
"eval_steps_per_second": 0.253,
"step": 7200
},
{
"epoch": 1.0906890552908253,
"grad_norm": 0.07770776748657227,
"learning_rate": 0.00017118,
"loss": 0.0321,
"step": 7210
},
{
"epoch": 1.092201800166402,
"grad_norm": 0.06977003812789917,
"learning_rate": 0.00017114000000000002,
"loss": 0.0315,
"step": 7220
},
{
"epoch": 1.0937145450419787,
"grad_norm": 0.077842116355896,
"learning_rate": 0.0001711,
"loss": 0.0317,
"step": 7230
},
{
"epoch": 1.0952272899175555,
"grad_norm": 0.11414997279644012,
"learning_rate": 0.00017106,
"loss": 0.0392,
"step": 7240
},
{
"epoch": 1.0967400347931322,
"grad_norm": 0.07568582892417908,
"learning_rate": 0.00017102,
"loss": 0.0369,
"step": 7250
},
{
"epoch": 1.098252779668709,
"grad_norm": 0.07864728569984436,
"learning_rate": 0.00017098000000000002,
"loss": 0.038,
"step": 7260
},
{
"epoch": 1.0997655245442857,
"grad_norm": 0.0852401927113533,
"learning_rate": 0.00017094,
"loss": 0.0323,
"step": 7270
},
{
"epoch": 1.1012782694198624,
"grad_norm": 0.06548303365707397,
"learning_rate": 0.0001709,
"loss": 0.0373,
"step": 7280
},
{
"epoch": 1.1027910142954391,
"grad_norm": 0.10153812170028687,
"learning_rate": 0.00017086,
"loss": 0.0321,
"step": 7290
},
{
"epoch": 1.1043037591710159,
"grad_norm": 0.09032442420721054,
"learning_rate": 0.00017082,
"loss": 0.0306,
"step": 7300
},
{
"epoch": 1.1058165040465926,
"grad_norm": 0.12109789252281189,
"learning_rate": 0.00017078000000000001,
"loss": 0.0355,
"step": 7310
},
{
"epoch": 1.1073292489221693,
"grad_norm": 0.08515240997076035,
"learning_rate": 0.00017074,
"loss": 0.0374,
"step": 7320
},
{
"epoch": 1.108841993797746,
"grad_norm": 0.06838446855545044,
"learning_rate": 0.0001707,
"loss": 0.0309,
"step": 7330
},
{
"epoch": 1.1103547386733228,
"grad_norm": 0.10029911994934082,
"learning_rate": 0.00017066,
"loss": 0.0377,
"step": 7340
},
{
"epoch": 1.1118674835488995,
"grad_norm": 0.08499938994646072,
"learning_rate": 0.00017062,
"loss": 0.0317,
"step": 7350
},
{
"epoch": 1.1133802284244763,
"grad_norm": 0.10972133278846741,
"learning_rate": 0.00017058,
"loss": 0.0344,
"step": 7360
},
{
"epoch": 1.114892973300053,
"grad_norm": 0.06848263740539551,
"learning_rate": 0.00017054,
"loss": 0.0356,
"step": 7370
},
{
"epoch": 1.1164057181756297,
"grad_norm": 0.06813491135835648,
"learning_rate": 0.00017050000000000002,
"loss": 0.0291,
"step": 7380
},
{
"epoch": 1.1179184630512065,
"grad_norm": 0.053215883672237396,
"learning_rate": 0.00017046,
"loss": 0.0297,
"step": 7390
},
{
"epoch": 1.1194312079267832,
"grad_norm": 0.08575928211212158,
"learning_rate": 0.00017042,
"loss": 0.0378,
"step": 7400
},
{
"epoch": 1.1194312079267832,
"eval_cer": 0.05163898174846133,
"eval_loss": 0.03768303617835045,
"eval_runtime": 10418.7834,
"eval_samples_per_second": 2.021,
"eval_steps_per_second": 0.253,
"step": 7400
},
{
"epoch": 1.12094395280236,
"grad_norm": 0.07621601223945618,
"learning_rate": 0.00017038,
"loss": 0.032,
"step": 7410
},
{
"epoch": 1.1224566976779367,
"grad_norm": 0.11499703675508499,
"learning_rate": 0.00017034,
"loss": 0.0331,
"step": 7420
},
{
"epoch": 1.1239694425535134,
"grad_norm": 0.08789568394422531,
"learning_rate": 0.00017030000000000002,
"loss": 0.0332,
"step": 7430
},
{
"epoch": 1.1254821874290901,
"grad_norm": 0.0887342318892479,
"learning_rate": 0.00017025999999999999,
"loss": 0.0374,
"step": 7440
},
{
"epoch": 1.1269949323046669,
"grad_norm": 0.11794856935739517,
"learning_rate": 0.00017022,
"loss": 0.0347,
"step": 7450
},
{
"epoch": 1.1285076771802436,
"grad_norm": 0.07593784481287003,
"learning_rate": 0.00017018,
"loss": 0.0323,
"step": 7460
},
{
"epoch": 1.1300204220558203,
"grad_norm": 0.06868909299373627,
"learning_rate": 0.00017014000000000002,
"loss": 0.0311,
"step": 7470
},
{
"epoch": 1.131533166931397,
"grad_norm": 0.1010032370686531,
"learning_rate": 0.00017010000000000001,
"loss": 0.0333,
"step": 7480
},
{
"epoch": 1.1330459118069738,
"grad_norm": 0.08664656430482864,
"learning_rate": 0.00017006,
"loss": 0.0358,
"step": 7490
},
{
"epoch": 1.1345586566825505,
"grad_norm": 0.09153386205434799,
"learning_rate": 0.00017002,
"loss": 0.0288,
"step": 7500
},
{
"epoch": 1.1360714015581272,
"grad_norm": 0.10042116045951843,
"learning_rate": 0.00016998,
"loss": 0.0324,
"step": 7510
},
{
"epoch": 1.137584146433704,
"grad_norm": 0.09703629463911057,
"learning_rate": 0.00016994000000000002,
"loss": 0.0356,
"step": 7520
},
{
"epoch": 1.1390968913092807,
"grad_norm": 0.07961410284042358,
"learning_rate": 0.0001699,
"loss": 0.0279,
"step": 7530
},
{
"epoch": 1.1406096361848574,
"grad_norm": 0.09164062142372131,
"learning_rate": 0.00016986000000000003,
"loss": 0.033,
"step": 7540
},
{
"epoch": 1.1421223810604342,
"grad_norm": 0.0804910659790039,
"learning_rate": 0.00016982,
"loss": 0.033,
"step": 7550
},
{
"epoch": 1.143635125936011,
"grad_norm": 0.07923970371484756,
"learning_rate": 0.00016978000000000002,
"loss": 0.0366,
"step": 7560
},
{
"epoch": 1.1451478708115876,
"grad_norm": 0.1198810487985611,
"learning_rate": 0.00016974,
"loss": 0.0361,
"step": 7570
},
{
"epoch": 1.1466606156871644,
"grad_norm": 0.08409520238637924,
"learning_rate": 0.0001697,
"loss": 0.0323,
"step": 7580
},
{
"epoch": 1.148173360562741,
"grad_norm": 0.09524326026439667,
"learning_rate": 0.00016966000000000003,
"loss": 0.0338,
"step": 7590
},
{
"epoch": 1.1496861054383178,
"grad_norm": 0.0670013502240181,
"learning_rate": 0.00016962,
"loss": 0.033,
"step": 7600
},
{
"epoch": 1.1496861054383178,
"eval_cer": 0.04317970118571997,
"eval_loss": 0.03775278851389885,
"eval_runtime": 10413.2831,
"eval_samples_per_second": 2.022,
"eval_steps_per_second": 0.253,
"step": 7600
},
{
"epoch": 1.1511988503138946,
"grad_norm": 0.07331959903240204,
"learning_rate": 0.00016958,
"loss": 0.0331,
"step": 7610
},
{
"epoch": 1.1527115951894713,
"grad_norm": 0.06851343810558319,
"learning_rate": 0.00016954,
"loss": 0.0306,
"step": 7620
},
{
"epoch": 1.154224340065048,
"grad_norm": 0.07627418637275696,
"learning_rate": 0.00016950000000000003,
"loss": 0.0334,
"step": 7630
},
{
"epoch": 1.1557370849406248,
"grad_norm": 0.08676694333553314,
"learning_rate": 0.00016946000000000002,
"loss": 0.0322,
"step": 7640
},
{
"epoch": 1.1572498298162015,
"grad_norm": 0.07023747265338898,
"learning_rate": 0.00016942000000000001,
"loss": 0.0358,
"step": 7650
},
{
"epoch": 1.1587625746917782,
"grad_norm": 0.07805462926626205,
"learning_rate": 0.00016938,
"loss": 0.0325,
"step": 7660
},
{
"epoch": 1.160275319567355,
"grad_norm": 0.0867529958486557,
"learning_rate": 0.00016934,
"loss": 0.0318,
"step": 7670
},
{
"epoch": 1.1617880644429317,
"grad_norm": 0.08449842035770416,
"learning_rate": 0.00016930000000000002,
"loss": 0.0408,
"step": 7680
},
{
"epoch": 1.1633008093185084,
"grad_norm": 0.08054087311029434,
"learning_rate": 0.00016926000000000002,
"loss": 0.0306,
"step": 7690
},
{
"epoch": 1.1648135541940852,
"grad_norm": 0.08645962178707123,
"learning_rate": 0.00016922,
"loss": 0.0299,
"step": 7700
},
{
"epoch": 1.166326299069662,
"grad_norm": 0.0892554521560669,
"learning_rate": 0.00016918,
"loss": 0.0352,
"step": 7710
},
{
"epoch": 1.1678390439452386,
"grad_norm": 0.06643500924110413,
"learning_rate": 0.00016914,
"loss": 0.0284,
"step": 7720
},
{
"epoch": 1.1693517888208154,
"grad_norm": 0.06918591260910034,
"learning_rate": 0.00016910000000000002,
"loss": 0.0278,
"step": 7730
},
{
"epoch": 1.170864533696392,
"grad_norm": 0.08370740711688995,
"learning_rate": 0.00016906,
"loss": 0.0316,
"step": 7740
},
{
"epoch": 1.1723772785719688,
"grad_norm": 0.053777385503053665,
"learning_rate": 0.00016902,
"loss": 0.036,
"step": 7750
},
{
"epoch": 1.1738900234475456,
"grad_norm": 0.0665329247713089,
"learning_rate": 0.00016898,
"loss": 0.0333,
"step": 7760
},
{
"epoch": 1.1754027683231223,
"grad_norm": 0.07484222948551178,
"learning_rate": 0.00016894000000000002,
"loss": 0.0319,
"step": 7770
},
{
"epoch": 1.176915513198699,
"grad_norm": 0.08218715339899063,
"learning_rate": 0.0001689,
"loss": 0.0308,
"step": 7780
},
{
"epoch": 1.1784282580742758,
"grad_norm": 0.06873024255037308,
"learning_rate": 0.00016886,
"loss": 0.0349,
"step": 7790
},
{
"epoch": 1.1799410029498525,
"grad_norm": 0.07846609503030777,
"learning_rate": 0.00016882,
"loss": 0.0359,
"step": 7800
},
{
"epoch": 1.1799410029498525,
"eval_cer": 0.1078840865459451,
"eval_loss": 0.03878456726670265,
"eval_runtime": 10398.1972,
"eval_samples_per_second": 2.025,
"eval_steps_per_second": 0.253,
"step": 7800
},
{
"epoch": 1.1814537478254292,
"grad_norm": 0.06112883612513542,
"learning_rate": 0.00016878,
"loss": 0.0324,
"step": 7810
},
{
"epoch": 1.182966492701006,
"grad_norm": 0.07065495103597641,
"learning_rate": 0.00016874000000000001,
"loss": 0.0333,
"step": 7820
},
{
"epoch": 1.1844792375765827,
"grad_norm": 0.10944267362356186,
"learning_rate": 0.0001687,
"loss": 0.0322,
"step": 7830
},
{
"epoch": 1.1859919824521594,
"grad_norm": 0.08741329610347748,
"learning_rate": 0.00016866000000000003,
"loss": 0.0339,
"step": 7840
},
{
"epoch": 1.1875047273277362,
"grad_norm": 0.06457091867923737,
"learning_rate": 0.00016862,
"loss": 0.0345,
"step": 7850
},
{
"epoch": 1.1890174722033129,
"grad_norm": 0.0570165179669857,
"learning_rate": 0.00016858000000000002,
"loss": 0.032,
"step": 7860
},
{
"epoch": 1.1905302170788896,
"grad_norm": 0.07944530248641968,
"learning_rate": 0.00016854,
"loss": 0.0347,
"step": 7870
},
{
"epoch": 1.1920429619544664,
"grad_norm": 0.06981216371059418,
"learning_rate": 0.0001685,
"loss": 0.0329,
"step": 7880
},
{
"epoch": 1.193555706830043,
"grad_norm": 0.052252449095249176,
"learning_rate": 0.00016846000000000002,
"loss": 0.0327,
"step": 7890
},
{
"epoch": 1.1950684517056198,
"grad_norm": 0.05333190783858299,
"learning_rate": 0.00016842,
"loss": 0.0269,
"step": 7900
},
{
"epoch": 1.1965811965811965,
"grad_norm": 0.18012838065624237,
"learning_rate": 0.00016838,
"loss": 0.0324,
"step": 7910
},
{
"epoch": 1.1980939414567733,
"grad_norm": 0.06892676651477814,
"learning_rate": 0.00016834,
"loss": 0.0294,
"step": 7920
},
{
"epoch": 1.19960668633235,
"grad_norm": 0.07558593899011612,
"learning_rate": 0.00016830000000000003,
"loss": 0.0371,
"step": 7930
},
{
"epoch": 1.2011194312079267,
"grad_norm": 0.08046507835388184,
"learning_rate": 0.00016826000000000002,
"loss": 0.0311,
"step": 7940
},
{
"epoch": 1.2026321760835035,
"grad_norm": 0.07986424118280411,
"learning_rate": 0.00016822,
"loss": 0.0357,
"step": 7950
},
{
"epoch": 1.2041449209590802,
"grad_norm": 0.07394195348024368,
"learning_rate": 0.00016818,
"loss": 0.0341,
"step": 7960
},
{
"epoch": 1.205657665834657,
"grad_norm": 0.06269822269678116,
"learning_rate": 0.00016814,
"loss": 0.0329,
"step": 7970
},
{
"epoch": 1.2071704107102337,
"grad_norm": 0.07179784774780273,
"learning_rate": 0.00016810000000000002,
"loss": 0.0329,
"step": 7980
},
{
"epoch": 1.2086831555858104,
"grad_norm": 0.10174887627363205,
"learning_rate": 0.00016806000000000001,
"loss": 0.0262,
"step": 7990
},
{
"epoch": 1.2101959004613871,
"grad_norm": 0.06536643952131271,
"learning_rate": 0.00016802,
"loss": 0.034,
"step": 8000
},
{
"epoch": 1.2101959004613871,
"eval_cer": 0.15941559003095868,
"eval_loss": 0.03837862238287926,
"eval_runtime": 10390.1541,
"eval_samples_per_second": 2.026,
"eval_steps_per_second": 0.253,
"step": 8000
},
{
"epoch": 1.2117086453369639,
"grad_norm": 0.13079065084457397,
"learning_rate": 0.00016798,
"loss": 0.037,
"step": 8010
},
{
"epoch": 1.2132213902125406,
"grad_norm": 0.07293607294559479,
"learning_rate": 0.00016794000000000002,
"loss": 0.0295,
"step": 8020
},
{
"epoch": 1.2147341350881173,
"grad_norm": 0.07390507310628891,
"learning_rate": 0.00016790000000000002,
"loss": 0.0309,
"step": 8030
},
{
"epoch": 1.216246879963694,
"grad_norm": 0.22675780951976776,
"learning_rate": 0.00016786,
"loss": 0.0341,
"step": 8040
},
{
"epoch": 1.2177596248392708,
"grad_norm": 0.06630139797925949,
"learning_rate": 0.00016782,
"loss": 0.0359,
"step": 8050
},
{
"epoch": 1.2192723697148475,
"grad_norm": 0.09231210500001907,
"learning_rate": 0.00016778,
"loss": 0.0325,
"step": 8060
},
{
"epoch": 1.2207851145904243,
"grad_norm": 0.067893847823143,
"learning_rate": 0.00016774000000000002,
"loss": 0.0338,
"step": 8070
},
{
"epoch": 1.222297859466001,
"grad_norm": 0.16284491121768951,
"learning_rate": 0.0001677,
"loss": 0.0362,
"step": 8080
},
{
"epoch": 1.2238106043415777,
"grad_norm": 0.07695828378200531,
"learning_rate": 0.00016766,
"loss": 0.0367,
"step": 8090
},
{
"epoch": 1.2253233492171545,
"grad_norm": 0.07685229927301407,
"learning_rate": 0.00016762,
"loss": 0.0383,
"step": 8100
},
{
"epoch": 1.2268360940927312,
"grad_norm": 0.08510534465312958,
"learning_rate": 0.00016758,
"loss": 0.0346,
"step": 8110
},
{
"epoch": 1.228348838968308,
"grad_norm": 0.16018977761268616,
"learning_rate": 0.00016754,
"loss": 0.0314,
"step": 8120
},
{
"epoch": 1.2298615838438847,
"grad_norm": 0.10644716769456863,
"learning_rate": 0.0001675,
"loss": 0.0427,
"step": 8130
},
{
"epoch": 1.2313743287194614,
"grad_norm": 0.06390608847141266,
"learning_rate": 0.00016746000000000003,
"loss": 0.0333,
"step": 8140
},
{
"epoch": 1.2328870735950381,
"grad_norm": 0.1173742264509201,
"learning_rate": 0.00016742,
"loss": 0.0335,
"step": 8150
},
{
"epoch": 1.2343998184706149,
"grad_norm": 0.08506636321544647,
"learning_rate": 0.00016738000000000001,
"loss": 0.0393,
"step": 8160
},
{
"epoch": 1.2359125633461916,
"grad_norm": 0.08176897466182709,
"learning_rate": 0.00016734,
"loss": 0.0306,
"step": 8170
},
{
"epoch": 1.2374253082217683,
"grad_norm": 0.11272590607404709,
"learning_rate": 0.0001673,
"loss": 0.0368,
"step": 8180
},
{
"epoch": 1.238938053097345,
"grad_norm": 0.10923430323600769,
"learning_rate": 0.00016726000000000002,
"loss": 0.0389,
"step": 8190
},
{
"epoch": 1.2404507979729218,
"grad_norm": 0.05665091425180435,
"learning_rate": 0.00016722,
"loss": 0.0352,
"step": 8200
},
{
"epoch": 1.2404507979729218,
"eval_cer": 0.195939668868118,
"eval_loss": 0.03837649151682854,
"eval_runtime": 10379.5895,
"eval_samples_per_second": 2.028,
"eval_steps_per_second": 0.254,
"step": 8200
},
{
"epoch": 1.2419635428484985,
"grad_norm": 0.08927123993635178,
"learning_rate": 0.00016718,
"loss": 0.0356,
"step": 8210
},
{
"epoch": 1.2434762877240753,
"grad_norm": 0.09398534893989563,
"learning_rate": 0.00016714,
"loss": 0.0365,
"step": 8220
},
{
"epoch": 1.244989032599652,
"grad_norm": 0.0905461311340332,
"learning_rate": 0.00016710000000000002,
"loss": 0.0335,
"step": 8230
},
{
"epoch": 1.2465017774752287,
"grad_norm": 0.09033455699682236,
"learning_rate": 0.00016706000000000002,
"loss": 0.0376,
"step": 8240
},
{
"epoch": 1.2480145223508055,
"grad_norm": 0.08217161148786545,
"learning_rate": 0.00016702,
"loss": 0.032,
"step": 8250
},
{
"epoch": 1.2495272672263822,
"grad_norm": 0.0694824755191803,
"learning_rate": 0.00016698,
"loss": 0.0354,
"step": 8260
},
{
"epoch": 1.2510400121019591,
"grad_norm": 0.08535374701023102,
"learning_rate": 0.00016694,
"loss": 0.0288,
"step": 8270
},
{
"epoch": 1.2525527569775359,
"grad_norm": 0.10267391055822372,
"learning_rate": 0.00016690000000000002,
"loss": 0.0331,
"step": 8280
},
{
"epoch": 1.2540655018531126,
"grad_norm": 0.0720328763127327,
"learning_rate": 0.00016686,
"loss": 0.0324,
"step": 8290
},
{
"epoch": 1.2555782467286893,
"grad_norm": 0.15617039799690247,
"learning_rate": 0.00016682,
"loss": 0.0374,
"step": 8300
},
{
"epoch": 1.257090991604266,
"grad_norm": 0.09863468259572983,
"learning_rate": 0.00016678,
"loss": 0.0363,
"step": 8310
},
{
"epoch": 1.2586037364798428,
"grad_norm": 0.08562877029180527,
"learning_rate": 0.00016674000000000002,
"loss": 0.0347,
"step": 8320
},
{
"epoch": 1.2601164813554195,
"grad_norm": 0.09868349879980087,
"learning_rate": 0.0001667,
"loss": 0.0362,
"step": 8330
},
{
"epoch": 1.2616292262309963,
"grad_norm": 0.09744835644960403,
"learning_rate": 0.00016666,
"loss": 0.0364,
"step": 8340
},
{
"epoch": 1.263141971106573,
"grad_norm": 0.19243358075618744,
"learning_rate": 0.00016662,
"loss": 0.0378,
"step": 8350
},
{
"epoch": 1.2646547159821497,
"grad_norm": 0.06478457897901535,
"learning_rate": 0.00016658,
"loss": 0.033,
"step": 8360
},
{
"epoch": 1.2661674608577265,
"grad_norm": 0.09313791990280151,
"learning_rate": 0.00016654000000000001,
"loss": 0.04,
"step": 8370
},
{
"epoch": 1.2676802057333032,
"grad_norm": 0.0906825065612793,
"learning_rate": 0.0001665,
"loss": 0.0341,
"step": 8380
},
{
"epoch": 1.26919295060888,
"grad_norm": 0.08549359440803528,
"learning_rate": 0.00016646000000000003,
"loss": 0.0376,
"step": 8390
},
{
"epoch": 1.2707056954844567,
"grad_norm": 0.0915452241897583,
"learning_rate": 0.00016642,
"loss": 0.029,
"step": 8400
},
{
"epoch": 1.2707056954844567,
"eval_cer": 0.19141261028875828,
"eval_loss": 0.03777679055929184,
"eval_runtime": 10360.722,
"eval_samples_per_second": 2.032,
"eval_steps_per_second": 0.254,
"step": 8400
},
{
"epoch": 1.2722184403600334,
"grad_norm": 0.07039971649646759,
"learning_rate": 0.00016638,
"loss": 0.0355,
"step": 8410
},
{
"epoch": 1.2737311852356101,
"grad_norm": 0.08890164643526077,
"learning_rate": 0.00016634,
"loss": 0.03,
"step": 8420
},
{
"epoch": 1.2752439301111869,
"grad_norm": 0.07611805945634842,
"learning_rate": 0.0001663,
"loss": 0.037,
"step": 8430
},
{
"epoch": 1.2767566749867636,
"grad_norm": 0.10268427431583405,
"learning_rate": 0.00016626000000000002,
"loss": 0.0346,
"step": 8440
},
{
"epoch": 1.2782694198623403,
"grad_norm": 0.07185817509889603,
"learning_rate": 0.00016622,
"loss": 0.0334,
"step": 8450
},
{
"epoch": 1.279782164737917,
"grad_norm": 0.09720634669065475,
"learning_rate": 0.00016618,
"loss": 0.0328,
"step": 8460
},
{
"epoch": 1.2812949096134938,
"grad_norm": 0.08373324573040009,
"learning_rate": 0.00016614,
"loss": 0.0342,
"step": 8470
},
{
"epoch": 1.2828076544890705,
"grad_norm": 0.05525701493024826,
"learning_rate": 0.0001661,
"loss": 0.0295,
"step": 8480
},
{
"epoch": 1.2843203993646473,
"grad_norm": 0.08398504555225372,
"learning_rate": 0.00016606000000000002,
"loss": 0.0336,
"step": 8490
},
{
"epoch": 1.285833144240224,
"grad_norm": 0.11384329944849014,
"learning_rate": 0.00016601999999999999,
"loss": 0.0335,
"step": 8500
},
{
"epoch": 1.2873458891158007,
"grad_norm": 0.05366117134690285,
"learning_rate": 0.00016598,
"loss": 0.0303,
"step": 8510
},
{
"epoch": 1.2888586339913775,
"grad_norm": 0.09270923584699631,
"learning_rate": 0.00016594,
"loss": 0.0309,
"step": 8520
},
{
"epoch": 1.2903713788669542,
"grad_norm": 0.09621911495923996,
"learning_rate": 0.00016590000000000002,
"loss": 0.0326,
"step": 8530
},
{
"epoch": 1.291884123742531,
"grad_norm": 0.09750113636255264,
"learning_rate": 0.00016586000000000001,
"loss": 0.032,
"step": 8540
},
{
"epoch": 1.2933968686181077,
"grad_norm": 0.08557499945163727,
"learning_rate": 0.00016582,
"loss": 0.0331,
"step": 8550
},
{
"epoch": 1.2949096134936844,
"grad_norm": 0.0842200294137001,
"learning_rate": 0.00016578,
"loss": 0.0339,
"step": 8560
},
{
"epoch": 1.2964223583692611,
"grad_norm": 0.06341574341058731,
"learning_rate": 0.00016574,
"loss": 0.0369,
"step": 8570
},
{
"epoch": 1.2979351032448379,
"grad_norm": 0.07687686383724213,
"learning_rate": 0.00016570000000000002,
"loss": 0.0291,
"step": 8580
},
{
"epoch": 1.2994478481204146,
"grad_norm": 0.07118263840675354,
"learning_rate": 0.00016566,
"loss": 0.0331,
"step": 8590
},
{
"epoch": 1.3009605929959913,
"grad_norm": 0.10967772454023361,
"learning_rate": 0.00016562,
"loss": 0.04,
"step": 8600
},
{
"epoch": 1.3009605929959913,
"eval_cer": 0.15955704130871465,
"eval_loss": 0.03786647692322731,
"eval_runtime": 10383.8112,
"eval_samples_per_second": 2.027,
"eval_steps_per_second": 0.253,
"step": 8600
},
{
"epoch": 1.302473337871568,
"grad_norm": 0.09102348983287811,
"learning_rate": 0.00016558,
"loss": 0.0337,
"step": 8610
},
{
"epoch": 1.3039860827471448,
"grad_norm": 0.0596625916659832,
"learning_rate": 0.00016554000000000002,
"loss": 0.0341,
"step": 8620
},
{
"epoch": 1.3054988276227215,
"grad_norm": 0.0790410116314888,
"learning_rate": 0.0001655,
"loss": 0.0348,
"step": 8630
},
{
"epoch": 1.3070115724982982,
"grad_norm": 0.08243832737207413,
"learning_rate": 0.00016546,
"loss": 0.0351,
"step": 8640
},
{
"epoch": 1.308524317373875,
"grad_norm": 0.07890262454748154,
"learning_rate": 0.00016542,
"loss": 0.0331,
"step": 8650
},
{
"epoch": 1.3100370622494517,
"grad_norm": 0.06424404680728912,
"learning_rate": 0.00016538,
"loss": 0.032,
"step": 8660
},
{
"epoch": 1.3115498071250284,
"grad_norm": 0.08828658610582352,
"learning_rate": 0.00016534,
"loss": 0.0351,
"step": 8670
},
{
"epoch": 1.3130625520006052,
"grad_norm": 0.07190482318401337,
"learning_rate": 0.0001653,
"loss": 0.0334,
"step": 8680
},
{
"epoch": 1.314575296876182,
"grad_norm": 0.1207108125090599,
"learning_rate": 0.00016526000000000003,
"loss": 0.0333,
"step": 8690
},
{
"epoch": 1.3160880417517586,
"grad_norm": 0.057197410613298416,
"learning_rate": 0.00016522,
"loss": 0.0273,
"step": 8700
},
{
"epoch": 1.3176007866273354,
"grad_norm": 0.0845530703663826,
"learning_rate": 0.00016518000000000001,
"loss": 0.0398,
"step": 8710
},
{
"epoch": 1.319113531502912,
"grad_norm": 0.07357069104909897,
"learning_rate": 0.00016514,
"loss": 0.0334,
"step": 8720
},
{
"epoch": 1.3206262763784888,
"grad_norm": 0.07419273257255554,
"learning_rate": 0.0001651,
"loss": 0.0267,
"step": 8730
},
{
"epoch": 1.3221390212540656,
"grad_norm": 0.08293847739696503,
"learning_rate": 0.00016506000000000002,
"loss": 0.0286,
"step": 8740
},
{
"epoch": 1.3236517661296423,
"grad_norm": 0.09437254071235657,
"learning_rate": 0.00016502,
"loss": 0.0411,
"step": 8750
},
{
"epoch": 1.325164511005219,
"grad_norm": 0.06988554447889328,
"learning_rate": 0.00016498,
"loss": 0.0288,
"step": 8760
},
{
"epoch": 1.3266772558807958,
"grad_norm": 0.11081293970346451,
"learning_rate": 0.00016494,
"loss": 0.0342,
"step": 8770
},
{
"epoch": 1.3281900007563725,
"grad_norm": 0.0911073237657547,
"learning_rate": 0.0001649,
"loss": 0.0324,
"step": 8780
},
{
"epoch": 1.3297027456319492,
"grad_norm": 0.08337673544883728,
"learning_rate": 0.00016486000000000002,
"loss": 0.0297,
"step": 8790
},
{
"epoch": 1.331215490507526,
"grad_norm": 0.09077824652194977,
"learning_rate": 0.00016482,
"loss": 0.0319,
"step": 8800
},
{
"epoch": 1.331215490507526,
"eval_cer": 0.050760007214632856,
"eval_loss": 0.03842457756400108,
"eval_runtime": 10378.6583,
"eval_samples_per_second": 2.028,
"eval_steps_per_second": 0.254,
"step": 8800
},
{
"epoch": 1.3327282353831027,
"grad_norm": 0.12336084991693497,
"learning_rate": 0.00016478,
"loss": 0.0371,
"step": 8810
},
{
"epoch": 1.3342409802586794,
"grad_norm": 0.07978357374668121,
"learning_rate": 0.00016474,
"loss": 0.0349,
"step": 8820
},
{
"epoch": 1.3357537251342562,
"grad_norm": 0.1073361411690712,
"learning_rate": 0.00016470000000000002,
"loss": 0.0417,
"step": 8830
},
{
"epoch": 1.337266470009833,
"grad_norm": 0.05822708085179329,
"learning_rate": 0.00016466,
"loss": 0.0302,
"step": 8840
},
{
"epoch": 1.3387792148854096,
"grad_norm": 0.06241593137383461,
"learning_rate": 0.00016462,
"loss": 0.0365,
"step": 8850
},
{
"epoch": 1.3402919597609864,
"grad_norm": 0.10107123106718063,
"learning_rate": 0.00016458,
"loss": 0.0345,
"step": 8860
},
{
"epoch": 1.341804704636563,
"grad_norm": 0.09659604728221893,
"learning_rate": 0.00016454,
"loss": 0.0324,
"step": 8870
},
{
"epoch": 1.3433174495121398,
"grad_norm": 0.07501540333032608,
"learning_rate": 0.00016450000000000001,
"loss": 0.0317,
"step": 8880
},
{
"epoch": 1.3448301943877166,
"grad_norm": 0.071120485663414,
"learning_rate": 0.00016446,
"loss": 0.0299,
"step": 8890
},
{
"epoch": 1.3463429392632933,
"grad_norm": 0.07235920429229736,
"learning_rate": 0.00016442000000000003,
"loss": 0.0337,
"step": 8900
},
{
"epoch": 1.34785568413887,
"grad_norm": 0.08588097244501114,
"learning_rate": 0.00016438,
"loss": 0.0302,
"step": 8910
},
{
"epoch": 1.3493684290144468,
"grad_norm": 0.052244190126657486,
"learning_rate": 0.00016434000000000002,
"loss": 0.0326,
"step": 8920
},
{
"epoch": 1.3508811738900235,
"grad_norm": 0.0702931210398674,
"learning_rate": 0.0001643,
"loss": 0.0372,
"step": 8930
},
{
"epoch": 1.3523939187656002,
"grad_norm": 0.10441485792398453,
"learning_rate": 0.00016426,
"loss": 0.037,
"step": 8940
},
{
"epoch": 1.353906663641177,
"grad_norm": 0.10514800250530243,
"learning_rate": 0.00016422000000000002,
"loss": 0.037,
"step": 8950
},
{
"epoch": 1.3554194085167537,
"grad_norm": 0.07011867314577103,
"learning_rate": 0.00016418,
"loss": 0.0314,
"step": 8960
},
{
"epoch": 1.3569321533923304,
"grad_norm": 0.06335943937301636,
"learning_rate": 0.00016414,
"loss": 0.0311,
"step": 8970
},
{
"epoch": 1.3584448982679072,
"grad_norm": 0.07194424420595169,
"learning_rate": 0.0001641,
"loss": 0.0336,
"step": 8980
},
{
"epoch": 1.3599576431434839,
"grad_norm": 0.07171431183815002,
"learning_rate": 0.00016406000000000003,
"loss": 0.0312,
"step": 8990
},
{
"epoch": 1.3614703880190606,
"grad_norm": 0.14893119037151337,
"learning_rate": 0.00016402000000000002,
"loss": 0.0348,
"step": 9000
},
{
"epoch": 1.3614703880190606,
"eval_cer": 0.23852391576669063,
"eval_loss": 0.03737874701619148,
"eval_runtime": 10378.6671,
"eval_samples_per_second": 2.028,
"eval_steps_per_second": 0.254,
"step": 9000
},
{
"epoch": 1.3629831328946374,
"grad_norm": 0.09854207932949066,
"learning_rate": 0.00016398,
"loss": 0.0334,
"step": 9010
},
{
"epoch": 1.364495877770214,
"grad_norm": 0.0829731673002243,
"learning_rate": 0.00016394,
"loss": 0.0367,
"step": 9020
},
{
"epoch": 1.3660086226457908,
"grad_norm": 0.05378841981291771,
"learning_rate": 0.0001639,
"loss": 0.0328,
"step": 9030
},
{
"epoch": 1.3675213675213675,
"grad_norm": 0.08590775728225708,
"learning_rate": 0.00016386000000000002,
"loss": 0.0337,
"step": 9040
},
{
"epoch": 1.3690341123969443,
"grad_norm": 0.06473217159509659,
"learning_rate": 0.00016382000000000001,
"loss": 0.0309,
"step": 9050
},
{
"epoch": 1.370546857272521,
"grad_norm": 0.14496292173862457,
"learning_rate": 0.00016378,
"loss": 0.0362,
"step": 9060
},
{
"epoch": 1.3720596021480977,
"grad_norm": 0.0658840760588646,
"learning_rate": 0.00016374,
"loss": 0.0316,
"step": 9070
},
{
"epoch": 1.3735723470236745,
"grad_norm": 0.0722692534327507,
"learning_rate": 0.00016370000000000002,
"loss": 0.0321,
"step": 9080
},
{
"epoch": 1.3750850918992512,
"grad_norm": 0.0751873180270195,
"learning_rate": 0.00016366000000000002,
"loss": 0.0357,
"step": 9090
},
{
"epoch": 1.376597836774828,
"grad_norm": 0.07309116423130035,
"learning_rate": 0.00016362,
"loss": 0.0329,
"step": 9100
},
{
"epoch": 1.3781105816504047,
"grad_norm": 0.09205902367830276,
"learning_rate": 0.00016358,
"loss": 0.0311,
"step": 9110
},
{
"epoch": 1.3796233265259814,
"grad_norm": 0.06787604093551636,
"learning_rate": 0.00016354,
"loss": 0.0308,
"step": 9120
},
{
"epoch": 1.3811360714015581,
"grad_norm": 0.08365906029939651,
"learning_rate": 0.00016350000000000002,
"loss": 0.0344,
"step": 9130
},
{
"epoch": 1.3826488162771349,
"grad_norm": 0.07461418211460114,
"learning_rate": 0.00016346,
"loss": 0.0286,
"step": 9140
},
{
"epoch": 1.3841615611527116,
"grad_norm": 0.11862760782241821,
"learning_rate": 0.00016342,
"loss": 0.0361,
"step": 9150
},
{
"epoch": 1.3856743060282883,
"grad_norm": 0.07170487195253372,
"learning_rate": 0.00016338,
"loss": 0.0335,
"step": 9160
},
{
"epoch": 1.387187050903865,
"grad_norm": 0.05578533932566643,
"learning_rate": 0.00016334,
"loss": 0.0311,
"step": 9170
},
{
"epoch": 1.3886997957794418,
"grad_norm": 0.08838359266519547,
"learning_rate": 0.0001633,
"loss": 0.0341,
"step": 9180
},
{
"epoch": 1.3902125406550185,
"grad_norm": 0.09284081310033798,
"learning_rate": 0.00016326,
"loss": 0.0322,
"step": 9190
},
{
"epoch": 1.3917252855305953,
"grad_norm": 0.07425800710916519,
"learning_rate": 0.00016322000000000003,
"loss": 0.0319,
"step": 9200
},
{
"epoch": 1.3917252855305953,
"eval_cer": 0.11210909414354649,
"eval_loss": 0.036687206476926804,
"eval_runtime": 10439.2076,
"eval_samples_per_second": 2.017,
"eval_steps_per_second": 0.252,
"step": 9200
},
{
"epoch": 1.393238030406172,
"grad_norm": 0.0754477009177208,
"learning_rate": 0.00016318,
"loss": 0.0355,
"step": 9210
},
{
"epoch": 1.3947507752817487,
"grad_norm": 0.06408898532390594,
"learning_rate": 0.00016314,
"loss": 0.0345,
"step": 9220
},
{
"epoch": 1.3962635201573255,
"grad_norm": 0.06003674492239952,
"learning_rate": 0.0001631,
"loss": 0.0316,
"step": 9230
},
{
"epoch": 1.3977762650329022,
"grad_norm": 0.07409165799617767,
"learning_rate": 0.00016306,
"loss": 0.03,
"step": 9240
},
{
"epoch": 1.399289009908479,
"grad_norm": 0.07411226630210876,
"learning_rate": 0.00016302000000000002,
"loss": 0.0325,
"step": 9250
},
{
"epoch": 1.4008017547840557,
"grad_norm": 0.09041300415992737,
"learning_rate": 0.00016298,
"loss": 0.034,
"step": 9260
},
{
"epoch": 1.4023144996596324,
"grad_norm": 0.0684356689453125,
"learning_rate": 0.00016294,
"loss": 0.0345,
"step": 9270
},
{
"epoch": 1.4038272445352091,
"grad_norm": 0.08621818572282791,
"learning_rate": 0.0001629,
"loss": 0.0287,
"step": 9280
},
{
"epoch": 1.4053399894107859,
"grad_norm": 0.09592179954051971,
"learning_rate": 0.00016286000000000002,
"loss": 0.0371,
"step": 9290
},
{
"epoch": 1.4068527342863626,
"grad_norm": 0.061489395797252655,
"learning_rate": 0.00016282000000000002,
"loss": 0.0297,
"step": 9300
},
{
"epoch": 1.4083654791619393,
"grad_norm": 0.08933687955141068,
"learning_rate": 0.00016278,
"loss": 0.0329,
"step": 9310
},
{
"epoch": 1.409878224037516,
"grad_norm": 0.06542832404375076,
"learning_rate": 0.00016274,
"loss": 0.0359,
"step": 9320
},
{
"epoch": 1.4113909689130928,
"grad_norm": 0.10515543818473816,
"learning_rate": 0.0001627,
"loss": 0.0282,
"step": 9330
},
{
"epoch": 1.4129037137886695,
"grad_norm": 0.11535684019327164,
"learning_rate": 0.00016266000000000002,
"loss": 0.0346,
"step": 9340
},
{
"epoch": 1.4144164586642463,
"grad_norm": 0.10359009355306625,
"learning_rate": 0.00016262,
"loss": 0.0326,
"step": 9350
},
{
"epoch": 1.415929203539823,
"grad_norm": 0.08905740082263947,
"learning_rate": 0.00016258,
"loss": 0.0353,
"step": 9360
},
{
"epoch": 1.4174419484153997,
"grad_norm": 0.0570446141064167,
"learning_rate": 0.00016254,
"loss": 0.0282,
"step": 9370
},
{
"epoch": 1.4189546932909765,
"grad_norm": 0.0748140960931778,
"learning_rate": 0.00016250000000000002,
"loss": 0.0304,
"step": 9380
},
{
"epoch": 1.4204674381665532,
"grad_norm": 0.07355400919914246,
"learning_rate": 0.00016246,
"loss": 0.031,
"step": 9390
},
{
"epoch": 1.42198018304213,
"grad_norm": 0.09431416541337967,
"learning_rate": 0.00016242,
"loss": 0.0355,
"step": 9400
},
{
"epoch": 1.42198018304213,
"eval_cer": 0.09460805024547048,
"eval_loss": 0.03653513640165329,
"eval_runtime": 10519.6629,
"eval_samples_per_second": 2.001,
"eval_steps_per_second": 0.25,
"step": 9400
},
{
"epoch": 1.4234929279177067,
"grad_norm": 0.10641132295131683,
"learning_rate": 0.00016238,
"loss": 0.0299,
"step": 9410
},
{
"epoch": 1.4250056727932834,
"grad_norm": 0.051270656287670135,
"learning_rate": 0.00016234,
"loss": 0.0317,
"step": 9420
},
{
"epoch": 1.4265184176688601,
"grad_norm": 0.07362283766269684,
"learning_rate": 0.00016230000000000001,
"loss": 0.0269,
"step": 9430
},
{
"epoch": 1.4280311625444368,
"grad_norm": 0.060159552842378616,
"learning_rate": 0.00016226,
"loss": 0.0335,
"step": 9440
},
{
"epoch": 1.4295439074200136,
"grad_norm": 0.08667318522930145,
"learning_rate": 0.00016222000000000003,
"loss": 0.0361,
"step": 9450
},
{
"epoch": 1.4310566522955903,
"grad_norm": 0.06154588237404823,
"learning_rate": 0.00016218,
"loss": 0.0334,
"step": 9460
},
{
"epoch": 1.432569397171167,
"grad_norm": 0.10563425719738007,
"learning_rate": 0.00016214000000000002,
"loss": 0.0362,
"step": 9470
},
{
"epoch": 1.4340821420467438,
"grad_norm": 0.10325556248426437,
"learning_rate": 0.0001621,
"loss": 0.0343,
"step": 9480
},
{
"epoch": 1.4355948869223205,
"grad_norm": 0.08902329206466675,
"learning_rate": 0.00016206,
"loss": 0.032,
"step": 9490
},
{
"epoch": 1.4371076317978972,
"grad_norm": 0.07280543446540833,
"learning_rate": 0.00016202000000000002,
"loss": 0.0366,
"step": 9500
},
{
"epoch": 1.438620376673474,
"grad_norm": 0.09071139991283417,
"learning_rate": 0.00016198,
"loss": 0.0299,
"step": 9510
},
{
"epoch": 1.4401331215490507,
"grad_norm": 0.06658421456813812,
"learning_rate": 0.00016194,
"loss": 0.0281,
"step": 9520
},
{
"epoch": 1.4416458664246274,
"grad_norm": 0.0793207511305809,
"learning_rate": 0.0001619,
"loss": 0.0292,
"step": 9530
},
{
"epoch": 1.4431586113002042,
"grad_norm": 0.0829392522573471,
"learning_rate": 0.00016186,
"loss": 0.0337,
"step": 9540
},
{
"epoch": 1.444671356175781,
"grad_norm": 0.061817191541194916,
"learning_rate": 0.00016182000000000002,
"loss": 0.0298,
"step": 9550
},
{
"epoch": 1.4461841010513576,
"grad_norm": 0.09837779402732849,
"learning_rate": 0.00016177999999999999,
"loss": 0.037,
"step": 9560
},
{
"epoch": 1.4476968459269344,
"grad_norm": 0.05777046084403992,
"learning_rate": 0.00016174,
"loss": 0.0339,
"step": 9570
},
{
"epoch": 1.449209590802511,
"grad_norm": 0.07731931656599045,
"learning_rate": 0.0001617,
"loss": 0.0338,
"step": 9580
},
{
"epoch": 1.4507223356780878,
"grad_norm": 0.08898504078388214,
"learning_rate": 0.00016166000000000002,
"loss": 0.0358,
"step": 9590
},
{
"epoch": 1.4522350805536646,
"grad_norm": 0.0696534812450409,
"learning_rate": 0.00016162000000000001,
"loss": 0.0318,
"step": 9600
},
{
"epoch": 1.4522350805536646,
"eval_cer": 0.08453906649568975,
"eval_loss": 0.036363635212183,
"eval_runtime": 10514.0599,
"eval_samples_per_second": 2.002,
"eval_steps_per_second": 0.25,
"step": 9600
},
{
"epoch": 1.4537478254292413,
"grad_norm": 0.059242941439151764,
"learning_rate": 0.00016158,
"loss": 0.0313,
"step": 9610
},
{
"epoch": 1.455260570304818,
"grad_norm": 0.0844852551817894,
"learning_rate": 0.00016154,
"loss": 0.034,
"step": 9620
},
{
"epoch": 1.4567733151803948,
"grad_norm": 0.08737514168024063,
"learning_rate": 0.0001615,
"loss": 0.0314,
"step": 9630
},
{
"epoch": 1.4582860600559715,
"grad_norm": 0.08028477430343628,
"learning_rate": 0.00016146000000000002,
"loss": 0.028,
"step": 9640
},
{
"epoch": 1.4597988049315482,
"grad_norm": 0.08293917775154114,
"learning_rate": 0.00016142,
"loss": 0.0344,
"step": 9650
},
{
"epoch": 1.461311549807125,
"grad_norm": 0.07055462896823883,
"learning_rate": 0.00016138,
"loss": 0.0329,
"step": 9660
},
{
"epoch": 1.4628242946827017,
"grad_norm": 0.08431320637464523,
"learning_rate": 0.00016134,
"loss": 0.0313,
"step": 9670
},
{
"epoch": 1.4643370395582784,
"grad_norm": 0.09756868332624435,
"learning_rate": 0.00016130000000000002,
"loss": 0.0305,
"step": 9680
},
{
"epoch": 1.4658497844338552,
"grad_norm": 0.07265082001686096,
"learning_rate": 0.00016126,
"loss": 0.0333,
"step": 9690
},
{
"epoch": 1.467362529309432,
"grad_norm": 0.09156455099582672,
"learning_rate": 0.00016122,
"loss": 0.0356,
"step": 9700
},
{
"epoch": 1.4688752741850086,
"grad_norm": 0.06957582384347916,
"learning_rate": 0.00016118,
"loss": 0.0313,
"step": 9710
},
{
"epoch": 1.4703880190605854,
"grad_norm": 0.06783420592546463,
"learning_rate": 0.00016114,
"loss": 0.0297,
"step": 9720
},
{
"epoch": 1.471900763936162,
"grad_norm": 0.07193417102098465,
"learning_rate": 0.0001611,
"loss": 0.0302,
"step": 9730
},
{
"epoch": 1.4734135088117388,
"grad_norm": 0.08238872140645981,
"learning_rate": 0.00016106,
"loss": 0.0335,
"step": 9740
},
{
"epoch": 1.4749262536873156,
"grad_norm": 0.07197025418281555,
"learning_rate": 0.00016102000000000003,
"loss": 0.0369,
"step": 9750
},
{
"epoch": 1.4764389985628923,
"grad_norm": 0.08109525591135025,
"learning_rate": 0.00016098,
"loss": 0.0327,
"step": 9760
},
{
"epoch": 1.477951743438469,
"grad_norm": 0.12331151217222214,
"learning_rate": 0.00016094000000000001,
"loss": 0.0372,
"step": 9770
},
{
"epoch": 1.4794644883140458,
"grad_norm": 0.08190298080444336,
"learning_rate": 0.0001609,
"loss": 0.0293,
"step": 9780
},
{
"epoch": 1.4809772331896225,
"grad_norm": 0.05840008333325386,
"learning_rate": 0.00016086,
"loss": 0.0349,
"step": 9790
},
{
"epoch": 1.4824899780651992,
"grad_norm": 0.07874023169279099,
"learning_rate": 0.00016082000000000002,
"loss": 0.0322,
"step": 9800
},
{
"epoch": 1.4824899780651992,
"eval_cer": 0.24973192203254985,
"eval_loss": 0.036100711673498154,
"eval_runtime": 10381.657,
"eval_samples_per_second": 2.028,
"eval_steps_per_second": 0.254,
"step": 9800
},
{
"epoch": 1.484002722940776,
"grad_norm": 0.0776941329240799,
"learning_rate": 0.00016078,
"loss": 0.0358,
"step": 9810
},
{
"epoch": 1.4855154678163527,
"grad_norm": 0.12248267233371735,
"learning_rate": 0.00016074,
"loss": 0.0356,
"step": 9820
},
{
"epoch": 1.4870282126919294,
"grad_norm": 0.08847146481275558,
"learning_rate": 0.0001607,
"loss": 0.0274,
"step": 9830
},
{
"epoch": 1.4885409575675062,
"grad_norm": 0.0689850002527237,
"learning_rate": 0.00016066000000000002,
"loss": 0.0266,
"step": 9840
},
{
"epoch": 1.4900537024430829,
"grad_norm": 0.06342552602291107,
"learning_rate": 0.00016062000000000002,
"loss": 0.031,
"step": 9850
},
{
"epoch": 1.4915664473186596,
"grad_norm": 0.11846140772104263,
"learning_rate": 0.00016057999999999998,
"loss": 0.0348,
"step": 9860
},
{
"epoch": 1.4930791921942363,
"grad_norm": 0.07698410004377365,
"learning_rate": 0.00016054,
"loss": 0.0259,
"step": 9870
},
{
"epoch": 1.494591937069813,
"grad_norm": 0.11177106946706772,
"learning_rate": 0.0001605,
"loss": 0.0301,
"step": 9880
},
{
"epoch": 1.4961046819453898,
"grad_norm": 0.09459209442138672,
"learning_rate": 0.00016046000000000002,
"loss": 0.0349,
"step": 9890
},
{
"epoch": 1.4976174268209665,
"grad_norm": 0.08800119906663895,
"learning_rate": 0.00016042,
"loss": 0.0335,
"step": 9900
},
{
"epoch": 1.4991301716965433,
"grad_norm": 0.09330447763204575,
"learning_rate": 0.00016038,
"loss": 0.0326,
"step": 9910
},
{
"epoch": 1.50064291657212,
"grad_norm": 0.10210063308477402,
"learning_rate": 0.00016034,
"loss": 0.035,
"step": 9920
},
{
"epoch": 1.5021556614476967,
"grad_norm": 0.11886809766292572,
"learning_rate": 0.0001603,
"loss": 0.036,
"step": 9930
},
{
"epoch": 1.5036684063232735,
"grad_norm": 0.07646410167217255,
"learning_rate": 0.00016026000000000001,
"loss": 0.0269,
"step": 9940
},
{
"epoch": 1.5051811511988502,
"grad_norm": 0.09994587302207947,
"learning_rate": 0.00016022,
"loss": 0.0298,
"step": 9950
},
{
"epoch": 1.506693896074427,
"grad_norm": 0.0781632736325264,
"learning_rate": 0.00016018,
"loss": 0.0299,
"step": 9960
},
{
"epoch": 1.5082066409500037,
"grad_norm": 0.09286709874868393,
"learning_rate": 0.00016014,
"loss": 0.0334,
"step": 9970
},
{
"epoch": 1.5097193858255804,
"grad_norm": 0.08658807724714279,
"learning_rate": 0.00016010000000000002,
"loss": 0.032,
"step": 9980
},
{
"epoch": 1.5112321307011571,
"grad_norm": 0.09535326808691025,
"learning_rate": 0.00016006,
"loss": 0.032,
"step": 9990
},
{
"epoch": 1.5127448755767339,
"grad_norm": 0.056372299790382385,
"learning_rate": 0.00016002,
"loss": 0.033,
"step": 10000
},
{
"epoch": 1.5127448755767339,
"eval_cer": 0.1808933296766016,
"eval_loss": 0.03580623120069504,
"eval_runtime": 10388.4948,
"eval_samples_per_second": 2.026,
"eval_steps_per_second": 0.253,
"step": 10000
}
],
"logging_steps": 10,
"max_steps": 50000,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.622822387689695e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}