{ "best_metric": 43.10069742838263, "best_model_checkpoint": "./whisper-distil-v3/checkpoint-8000", "epoch": 0.43838018521562827, "eval_steps": 1000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010959504630390707, "grad_norm": 5.807405471801758, "learning_rate": 3.6e-06, "loss": 8.6037, "step": 20 }, { "epoch": 0.0021919009260781414, "grad_norm": 5.726860523223877, "learning_rate": 7.6e-06, "loss": 8.4874, "step": 40 }, { "epoch": 0.003287851389117212, "grad_norm": 7.615314960479736, "learning_rate": 1.16e-05, "loss": 8.0934, "step": 60 }, { "epoch": 0.004383801852156283, "grad_norm": 7.089465618133545, "learning_rate": 1.56e-05, "loss": 7.4227, "step": 80 }, { "epoch": 0.005479752315195353, "grad_norm": 5.158086776733398, "learning_rate": 1.9600000000000002e-05, "loss": 6.3418, "step": 100 }, { "epoch": 0.006575702778234424, "grad_norm": 3.288583278656006, "learning_rate": 2.3400000000000003e-05, "loss": 5.2747, "step": 120 }, { "epoch": 0.007671653241273495, "grad_norm": 3.1715681552886963, "learning_rate": 2.7400000000000002e-05, "loss": 4.3075, "step": 140 }, { "epoch": 0.008767603704312565, "grad_norm": 3.033198833465576, "learning_rate": 3.1400000000000004e-05, "loss": 3.631, "step": 160 }, { "epoch": 0.009863554167351636, "grad_norm": 3.027251720428467, "learning_rate": 3.54e-05, "loss": 3.2186, "step": 180 }, { "epoch": 0.010959504630390707, "grad_norm": 2.9063901901245117, "learning_rate": 3.94e-05, "loss": 2.9226, "step": 200 }, { "epoch": 0.012055455093429777, "grad_norm": 3.1165690422058105, "learning_rate": 4.3400000000000005e-05, "loss": 2.8402, "step": 220 }, { "epoch": 0.013151405556468848, "grad_norm": 2.7977383136749268, "learning_rate": 4.74e-05, "loss": 2.613, "step": 240 }, { "epoch": 0.014247356019507919, "grad_norm": 3.7818286418914795, "learning_rate": 5.14e-05, "loss": 2.377, "step": 260 }, { "epoch": 0.01534330648254699, "grad_norm": 3.2088804244995117, "learning_rate": 5.5400000000000005e-05, "loss": 2.3204, "step": 280 }, { "epoch": 0.01643925694558606, "grad_norm": 3.2518157958984375, "learning_rate": 5.94e-05, "loss": 2.1812, "step": 300 }, { "epoch": 0.01753520740862513, "grad_norm": 3.725226640701294, "learning_rate": 6.340000000000001e-05, "loss": 2.158, "step": 320 }, { "epoch": 0.0186311578716642, "grad_norm": 3.5929486751556396, "learning_rate": 6.740000000000001e-05, "loss": 2.1241, "step": 340 }, { "epoch": 0.019727108334703272, "grad_norm": 4.1317572593688965, "learning_rate": 7.14e-05, "loss": 2.2284, "step": 360 }, { "epoch": 0.020823058797742343, "grad_norm": 3.4276161193847656, "learning_rate": 7.54e-05, "loss": 1.9655, "step": 380 }, { "epoch": 0.021919009260781414, "grad_norm": 3.9775540828704834, "learning_rate": 7.94e-05, "loss": 1.9407, "step": 400 }, { "epoch": 0.023014959723820484, "grad_norm": 3.67511248588562, "learning_rate": 8.34e-05, "loss": 1.9312, "step": 420 }, { "epoch": 0.024110910186859555, "grad_norm": 4.781565189361572, "learning_rate": 8.740000000000001e-05, "loss": 1.9218, "step": 440 }, { "epoch": 0.025206860649898626, "grad_norm": 5.2797698974609375, "learning_rate": 9.140000000000001e-05, "loss": 1.8729, "step": 460 }, { "epoch": 0.026302811112937696, "grad_norm": 6.1737284660339355, "learning_rate": 9.54e-05, "loss": 1.6848, "step": 480 }, { "epoch": 0.027398761575976767, "grad_norm": 4.926702976226807, "learning_rate": 9.94e-05, "loss": 1.8866, "step": 500 }, { "epoch": 0.028494712039015838, "grad_norm": 4.043098449707031, "learning_rate": 0.0001, "loss": 1.749, "step": 520 }, { "epoch": 0.02959066250205491, "grad_norm": 4.022521495819092, "learning_rate": 0.0001, "loss": 1.7654, "step": 540 }, { "epoch": 0.03068661296509398, "grad_norm": 3.1964547634124756, "learning_rate": 0.0001, "loss": 1.7496, "step": 560 }, { "epoch": 0.03178256342813305, "grad_norm": 3.5182583332061768, "learning_rate": 0.0001, "loss": 1.7312, "step": 580 }, { "epoch": 0.03287851389117212, "grad_norm": 3.529665231704712, "learning_rate": 0.0001, "loss": 1.6307, "step": 600 }, { "epoch": 0.03397446435421119, "grad_norm": 3.329401969909668, "learning_rate": 0.0001, "loss": 1.7613, "step": 620 }, { "epoch": 0.03507041481725026, "grad_norm": 3.4481399059295654, "learning_rate": 0.0001, "loss": 1.6204, "step": 640 }, { "epoch": 0.03616636528028933, "grad_norm": 3.3551902770996094, "learning_rate": 0.0001, "loss": 1.5846, "step": 660 }, { "epoch": 0.0372623157433284, "grad_norm": 3.591031074523926, "learning_rate": 0.0001, "loss": 1.6077, "step": 680 }, { "epoch": 0.038358266206367474, "grad_norm": 3.8630764484405518, "learning_rate": 0.0001, "loss": 1.5275, "step": 700 }, { "epoch": 0.039454216669406544, "grad_norm": 3.77461838722229, "learning_rate": 0.0001, "loss": 1.5386, "step": 720 }, { "epoch": 0.040550167132445615, "grad_norm": 2.9158153533935547, "learning_rate": 0.0001, "loss": 1.5536, "step": 740 }, { "epoch": 0.041646117595484686, "grad_norm": 3.761077642440796, "learning_rate": 0.0001, "loss": 1.5607, "step": 760 }, { "epoch": 0.042742068058523756, "grad_norm": 3.5758230686187744, "learning_rate": 0.0001, "loss": 1.5145, "step": 780 }, { "epoch": 0.04383801852156283, "grad_norm": 3.01175856590271, "learning_rate": 0.0001, "loss": 1.5639, "step": 800 }, { "epoch": 0.0449339689846019, "grad_norm": 3.8395230770111084, "learning_rate": 0.0001, "loss": 1.6478, "step": 820 }, { "epoch": 0.04602991944764097, "grad_norm": 2.9240541458129883, "learning_rate": 0.0001, "loss": 1.5303, "step": 840 }, { "epoch": 0.04712586991068004, "grad_norm": 3.603835344314575, "learning_rate": 0.0001, "loss": 1.4436, "step": 860 }, { "epoch": 0.04822182037371911, "grad_norm": 3.1701183319091797, "learning_rate": 0.0001, "loss": 1.5622, "step": 880 }, { "epoch": 0.04931777083675818, "grad_norm": 4.054835796356201, "learning_rate": 0.0001, "loss": 1.4354, "step": 900 }, { "epoch": 0.05041372129979725, "grad_norm": 2.9651615619659424, "learning_rate": 0.0001, "loss": 1.4676, "step": 920 }, { "epoch": 0.05150967176283632, "grad_norm": 3.2480218410491943, "learning_rate": 0.0001, "loss": 1.3769, "step": 940 }, { "epoch": 0.05260562222587539, "grad_norm": 3.494356155395508, "learning_rate": 0.0001, "loss": 1.4928, "step": 960 }, { "epoch": 0.05370157268891446, "grad_norm": 3.394205331802368, "learning_rate": 0.0001, "loss": 1.4045, "step": 980 }, { "epoch": 0.054797523151953534, "grad_norm": 3.333587646484375, "learning_rate": 0.0001, "loss": 1.4953, "step": 1000 }, { "epoch": 0.054797523151953534, "eval_loss": 1.4313914775848389, "eval_runtime": 30911.9498, "eval_samples_per_second": 2.099, "eval_steps_per_second": 0.066, "eval_wer": 70.88587442180551, "step": 1000 }, { "epoch": 0.055893473614992605, "grad_norm": 3.2317609786987305, "learning_rate": 0.0001, "loss": 1.4376, "step": 1020 }, { "epoch": 0.056989424078031675, "grad_norm": 2.9077706336975098, "learning_rate": 0.0001, "loss": 1.4398, "step": 1040 }, { "epoch": 0.058085374541070746, "grad_norm": 3.0054707527160645, "learning_rate": 0.0001, "loss": 1.4326, "step": 1060 }, { "epoch": 0.05918132500410982, "grad_norm": 3.7243480682373047, "learning_rate": 0.0001, "loss": 1.4915, "step": 1080 }, { "epoch": 0.06027727546714889, "grad_norm": 2.9608402252197266, "learning_rate": 0.0001, "loss": 1.4595, "step": 1100 }, { "epoch": 0.06137322593018796, "grad_norm": 2.652829885482788, "learning_rate": 0.0001, "loss": 1.4378, "step": 1120 }, { "epoch": 0.06246917639322703, "grad_norm": 2.9017295837402344, "learning_rate": 0.0001, "loss": 1.4257, "step": 1140 }, { "epoch": 0.0635651268562661, "grad_norm": 3.0610859394073486, "learning_rate": 0.0001, "loss": 1.348, "step": 1160 }, { "epoch": 0.06466107731930516, "grad_norm": 2.701765775680542, "learning_rate": 0.0001, "loss": 1.3853, "step": 1180 }, { "epoch": 0.06575702778234424, "grad_norm": 3.83376145362854, "learning_rate": 0.0001, "loss": 1.4708, "step": 1200 }, { "epoch": 0.0668529782453833, "grad_norm": 3.178449869155884, "learning_rate": 0.0001, "loss": 1.362, "step": 1220 }, { "epoch": 0.06794892870842238, "grad_norm": 3.796205997467041, "learning_rate": 0.0001, "loss": 1.4331, "step": 1240 }, { "epoch": 0.06904487917146145, "grad_norm": 2.8163928985595703, "learning_rate": 0.0001, "loss": 1.2835, "step": 1260 }, { "epoch": 0.07014082963450052, "grad_norm": 2.698793888092041, "learning_rate": 0.0001, "loss": 1.3444, "step": 1280 }, { "epoch": 0.07123678009753959, "grad_norm": 2.584484815597534, "learning_rate": 0.0001, "loss": 1.2145, "step": 1300 }, { "epoch": 0.07233273056057866, "grad_norm": 2.696967363357544, "learning_rate": 0.0001, "loss": 1.2855, "step": 1320 }, { "epoch": 0.07342868102361773, "grad_norm": 3.382924795150757, "learning_rate": 0.0001, "loss": 1.2164, "step": 1340 }, { "epoch": 0.0745246314866568, "grad_norm": 2.8127260208129883, "learning_rate": 0.0001, "loss": 1.2873, "step": 1360 }, { "epoch": 0.07562058194969587, "grad_norm": 2.631011724472046, "learning_rate": 0.0001, "loss": 1.3759, "step": 1380 }, { "epoch": 0.07671653241273495, "grad_norm": 2.913276433944702, "learning_rate": 0.0001, "loss": 1.2688, "step": 1400 }, { "epoch": 0.07781248287577401, "grad_norm": 2.811455488204956, "learning_rate": 0.0001, "loss": 1.2179, "step": 1420 }, { "epoch": 0.07890843333881309, "grad_norm": 2.8242247104644775, "learning_rate": 0.0001, "loss": 1.142, "step": 1440 }, { "epoch": 0.08000438380185215, "grad_norm": 3.1733341217041016, "learning_rate": 0.0001, "loss": 1.2934, "step": 1460 }, { "epoch": 0.08110033426489123, "grad_norm": 2.491945743560791, "learning_rate": 0.0001, "loss": 1.3274, "step": 1480 }, { "epoch": 0.0821962847279303, "grad_norm": 2.717165470123291, "learning_rate": 0.0001, "loss": 1.2484, "step": 1500 }, { "epoch": 0.08329223519096937, "grad_norm": 2.3187918663024902, "learning_rate": 0.0001, "loss": 1.2038, "step": 1520 }, { "epoch": 0.08438818565400844, "grad_norm": 2.9296529293060303, "learning_rate": 0.0001, "loss": 1.1962, "step": 1540 }, { "epoch": 0.08548413611704751, "grad_norm": 2.5763330459594727, "learning_rate": 0.0001, "loss": 1.2122, "step": 1560 }, { "epoch": 0.08658008658008658, "grad_norm": 3.4159390926361084, "learning_rate": 0.0001, "loss": 1.2302, "step": 1580 }, { "epoch": 0.08767603704312565, "grad_norm": 2.893261432647705, "learning_rate": 0.0001, "loss": 1.2106, "step": 1600 }, { "epoch": 0.08877198750616472, "grad_norm": 2.1891727447509766, "learning_rate": 0.0001, "loss": 1.2282, "step": 1620 }, { "epoch": 0.0898679379692038, "grad_norm": 2.4100029468536377, "learning_rate": 0.0001, "loss": 1.2039, "step": 1640 }, { "epoch": 0.09096388843224286, "grad_norm": 2.5420494079589844, "learning_rate": 0.0001, "loss": 1.2201, "step": 1660 }, { "epoch": 0.09205983889528194, "grad_norm": 3.1885313987731934, "learning_rate": 0.0001, "loss": 1.2446, "step": 1680 }, { "epoch": 0.093155789358321, "grad_norm": 3.120586633682251, "learning_rate": 0.0001, "loss": 1.2308, "step": 1700 }, { "epoch": 0.09425173982136008, "grad_norm": 2.4548628330230713, "learning_rate": 0.0001, "loss": 1.1777, "step": 1720 }, { "epoch": 0.09534769028439914, "grad_norm": 3.101803779602051, "learning_rate": 0.0001, "loss": 1.2123, "step": 1740 }, { "epoch": 0.09644364074743822, "grad_norm": 2.536121368408203, "learning_rate": 0.0001, "loss": 1.1914, "step": 1760 }, { "epoch": 0.09753959121047728, "grad_norm": 2.3796801567077637, "learning_rate": 0.0001, "loss": 1.1848, "step": 1780 }, { "epoch": 0.09863554167351636, "grad_norm": 2.67964243888855, "learning_rate": 0.0001, "loss": 1.1973, "step": 1800 }, { "epoch": 0.09973149213655542, "grad_norm": 3.160212755203247, "learning_rate": 0.0001, "loss": 1.2472, "step": 1820 }, { "epoch": 0.1008274425995945, "grad_norm": 2.7035927772521973, "learning_rate": 0.0001, "loss": 1.1844, "step": 1840 }, { "epoch": 0.10192339306263357, "grad_norm": 2.7725090980529785, "learning_rate": 0.0001, "loss": 1.1262, "step": 1860 }, { "epoch": 0.10301934352567264, "grad_norm": 2.2705016136169434, "learning_rate": 0.0001, "loss": 1.182, "step": 1880 }, { "epoch": 0.10411529398871171, "grad_norm": 3.0717403888702393, "learning_rate": 0.0001, "loss": 1.137, "step": 1900 }, { "epoch": 0.10521124445175078, "grad_norm": 2.9270904064178467, "learning_rate": 0.0001, "loss": 1.2556, "step": 1920 }, { "epoch": 0.10630719491478985, "grad_norm": 2.4564895629882812, "learning_rate": 0.0001, "loss": 1.1812, "step": 1940 }, { "epoch": 0.10740314537782893, "grad_norm": 2.983851909637451, "learning_rate": 0.0001, "loss": 1.1445, "step": 1960 }, { "epoch": 0.10849909584086799, "grad_norm": 2.772733688354492, "learning_rate": 0.0001, "loss": 1.1968, "step": 1980 }, { "epoch": 0.10959504630390707, "grad_norm": 2.9768126010894775, "learning_rate": 0.0001, "loss": 1.0942, "step": 2000 }, { "epoch": 0.10959504630390707, "eval_loss": 1.1446514129638672, "eval_runtime": 30634.8587, "eval_samples_per_second": 2.118, "eval_steps_per_second": 0.066, "eval_wer": 61.28519240053001, "step": 2000 }, { "epoch": 0.11069099676694613, "grad_norm": 2.806312322616577, "learning_rate": 0.0001, "loss": 1.1924, "step": 2020 }, { "epoch": 0.11178694722998521, "grad_norm": 2.639443874359131, "learning_rate": 0.0001, "loss": 1.0572, "step": 2040 }, { "epoch": 0.11288289769302427, "grad_norm": 2.2005367279052734, "learning_rate": 0.0001, "loss": 1.1337, "step": 2060 }, { "epoch": 0.11397884815606335, "grad_norm": 2.4102020263671875, "learning_rate": 0.0001, "loss": 1.1297, "step": 2080 }, { "epoch": 0.11507479861910241, "grad_norm": 3.410691976547241, "learning_rate": 0.0001, "loss": 1.1354, "step": 2100 }, { "epoch": 0.11617074908214149, "grad_norm": 2.1337172985076904, "learning_rate": 0.0001, "loss": 1.1725, "step": 2120 }, { "epoch": 0.11726669954518056, "grad_norm": 2.627319097518921, "learning_rate": 0.0001, "loss": 1.1006, "step": 2140 }, { "epoch": 0.11836265000821963, "grad_norm": 2.6450726985931396, "learning_rate": 0.0001, "loss": 1.0985, "step": 2160 }, { "epoch": 0.1194586004712587, "grad_norm": 2.3205084800720215, "learning_rate": 0.0001, "loss": 1.1634, "step": 2180 }, { "epoch": 0.12055455093429777, "grad_norm": 2.51177978515625, "learning_rate": 0.0001, "loss": 1.1697, "step": 2200 }, { "epoch": 0.12165050139733684, "grad_norm": 2.6632323265075684, "learning_rate": 0.0001, "loss": 1.071, "step": 2220 }, { "epoch": 0.12274645186037592, "grad_norm": 2.8322274684906006, "learning_rate": 0.0001, "loss": 1.0983, "step": 2240 }, { "epoch": 0.12384240232341498, "grad_norm": 2.547708749771118, "learning_rate": 0.0001, "loss": 1.0629, "step": 2260 }, { "epoch": 0.12493835278645406, "grad_norm": 2.6638150215148926, "learning_rate": 0.0001, "loss": 1.1985, "step": 2280 }, { "epoch": 0.12603430324949313, "grad_norm": 2.980463743209839, "learning_rate": 0.0001, "loss": 1.1885, "step": 2300 }, { "epoch": 0.1271302537125322, "grad_norm": 1.9924368858337402, "learning_rate": 0.0001, "loss": 1.0971, "step": 2320 }, { "epoch": 0.12822620417557126, "grad_norm": 2.2847180366516113, "learning_rate": 0.0001, "loss": 1.1149, "step": 2340 }, { "epoch": 0.12932215463861033, "grad_norm": 2.4860479831695557, "learning_rate": 0.0001, "loss": 1.0927, "step": 2360 }, { "epoch": 0.13041810510164942, "grad_norm": 2.3988494873046875, "learning_rate": 0.0001, "loss": 1.1918, "step": 2380 }, { "epoch": 0.13151405556468848, "grad_norm": 2.5361902713775635, "learning_rate": 0.0001, "loss": 1.0603, "step": 2400 }, { "epoch": 0.13261000602772754, "grad_norm": 2.4060215950012207, "learning_rate": 0.0001, "loss": 1.056, "step": 2420 }, { "epoch": 0.1337059564907666, "grad_norm": 2.4094231128692627, "learning_rate": 0.0001, "loss": 1.0787, "step": 2440 }, { "epoch": 0.1348019069538057, "grad_norm": 2.5207912921905518, "learning_rate": 0.0001, "loss": 1.0901, "step": 2460 }, { "epoch": 0.13589785741684476, "grad_norm": 2.1340293884277344, "learning_rate": 0.0001, "loss": 1.1691, "step": 2480 }, { "epoch": 0.13699380787988383, "grad_norm": 2.312554359436035, "learning_rate": 0.0001, "loss": 0.9791, "step": 2500 }, { "epoch": 0.1380897583429229, "grad_norm": 2.2881298065185547, "learning_rate": 0.0001, "loss": 0.9998, "step": 2520 }, { "epoch": 0.13918570880596198, "grad_norm": 2.2146573066711426, "learning_rate": 0.0001, "loss": 1.094, "step": 2540 }, { "epoch": 0.14028165926900105, "grad_norm": 2.3992650508880615, "learning_rate": 0.0001, "loss": 1.0667, "step": 2560 }, { "epoch": 0.1413776097320401, "grad_norm": 2.7630209922790527, "learning_rate": 0.0001, "loss": 1.1541, "step": 2580 }, { "epoch": 0.14247356019507917, "grad_norm": 2.9216675758361816, "learning_rate": 0.0001, "loss": 1.0463, "step": 2600 }, { "epoch": 0.14356951065811827, "grad_norm": 2.366373062133789, "learning_rate": 0.0001, "loss": 1.0557, "step": 2620 }, { "epoch": 0.14466546112115733, "grad_norm": 2.7161865234375, "learning_rate": 0.0001, "loss": 1.1066, "step": 2640 }, { "epoch": 0.1457614115841964, "grad_norm": 2.046992778778076, "learning_rate": 0.0001, "loss": 0.9786, "step": 2660 }, { "epoch": 0.14685736204723546, "grad_norm": 2.6320793628692627, "learning_rate": 0.0001, "loss": 0.9564, "step": 2680 }, { "epoch": 0.14795331251027455, "grad_norm": 2.485445737838745, "learning_rate": 0.0001, "loss": 1.0283, "step": 2700 }, { "epoch": 0.1490492629733136, "grad_norm": 2.267420768737793, "learning_rate": 0.0001, "loss": 1.0092, "step": 2720 }, { "epoch": 0.15014521343635268, "grad_norm": 2.618067502975464, "learning_rate": 0.0001, "loss": 1.0369, "step": 2740 }, { "epoch": 0.15124116389939174, "grad_norm": 2.502471685409546, "learning_rate": 0.0001, "loss": 0.9982, "step": 2760 }, { "epoch": 0.15233711436243083, "grad_norm": 2.936964273452759, "learning_rate": 0.0001, "loss": 1.1122, "step": 2780 }, { "epoch": 0.1534330648254699, "grad_norm": 2.5342159271240234, "learning_rate": 0.0001, "loss": 1.0409, "step": 2800 }, { "epoch": 0.15452901528850896, "grad_norm": 2.88598895072937, "learning_rate": 0.0001, "loss": 1.0259, "step": 2820 }, { "epoch": 0.15562496575154802, "grad_norm": 2.6327946186065674, "learning_rate": 0.0001, "loss": 0.9829, "step": 2840 }, { "epoch": 0.1567209162145871, "grad_norm": 2.4873671531677246, "learning_rate": 0.0001, "loss": 1.0472, "step": 2860 }, { "epoch": 0.15781686667762618, "grad_norm": 2.1543166637420654, "learning_rate": 0.0001, "loss": 1.0157, "step": 2880 }, { "epoch": 0.15891281714066524, "grad_norm": 1.9687381982803345, "learning_rate": 0.0001, "loss": 1.0465, "step": 2900 }, { "epoch": 0.1600087676037043, "grad_norm": 2.868544816970825, "learning_rate": 0.0001, "loss": 0.9835, "step": 2920 }, { "epoch": 0.1611047180667434, "grad_norm": 2.3211984634399414, "learning_rate": 0.0001, "loss": 1.1204, "step": 2940 }, { "epoch": 0.16220066852978246, "grad_norm": 2.631458282470703, "learning_rate": 0.0001, "loss": 1.0175, "step": 2960 }, { "epoch": 0.16329661899282152, "grad_norm": 2.7994022369384766, "learning_rate": 0.0001, "loss": 1.0828, "step": 2980 }, { "epoch": 0.1643925694558606, "grad_norm": 2.051626443862915, "learning_rate": 0.0001, "loss": 0.97, "step": 3000 }, { "epoch": 0.1643925694558606, "eval_loss": 1.0072325468063354, "eval_runtime": 30710.9249, "eval_samples_per_second": 2.113, "eval_steps_per_second": 0.066, "eval_wer": 55.08434535201816, "step": 3000 }, { "epoch": 0.16548851991889968, "grad_norm": 2.6088364124298096, "learning_rate": 0.0001, "loss": 0.9803, "step": 3020 }, { "epoch": 0.16658447038193874, "grad_norm": 2.234034299850464, "learning_rate": 0.0001, "loss": 1.0757, "step": 3040 }, { "epoch": 0.1676804208449778, "grad_norm": 2.3472328186035156, "learning_rate": 0.0001, "loss": 0.9408, "step": 3060 }, { "epoch": 0.16877637130801687, "grad_norm": 2.5871200561523438, "learning_rate": 0.0001, "loss": 0.9269, "step": 3080 }, { "epoch": 0.16987232177105596, "grad_norm": 2.0150465965270996, "learning_rate": 0.0001, "loss": 1.0547, "step": 3100 }, { "epoch": 0.17096827223409503, "grad_norm": 2.5823395252227783, "learning_rate": 0.0001, "loss": 1.0559, "step": 3120 }, { "epoch": 0.1720642226971341, "grad_norm": 2.8252885341644287, "learning_rate": 0.0001, "loss": 1.1219, "step": 3140 }, { "epoch": 0.17316017316017315, "grad_norm": 2.1086535453796387, "learning_rate": 0.0001, "loss": 1.0089, "step": 3160 }, { "epoch": 0.17425612362321224, "grad_norm": 2.2288014888763428, "learning_rate": 0.0001, "loss": 1.136, "step": 3180 }, { "epoch": 0.1753520740862513, "grad_norm": 2.6622703075408936, "learning_rate": 0.0001, "loss": 1.0395, "step": 3200 }, { "epoch": 0.17644802454929037, "grad_norm": 1.9478541612625122, "learning_rate": 0.0001, "loss": 1.0658, "step": 3220 }, { "epoch": 0.17754397501232944, "grad_norm": 2.55828857421875, "learning_rate": 0.0001, "loss": 0.9904, "step": 3240 }, { "epoch": 0.1786399254753685, "grad_norm": 2.533651828765869, "learning_rate": 0.0001, "loss": 0.9733, "step": 3260 }, { "epoch": 0.1797358759384076, "grad_norm": 1.8745101690292358, "learning_rate": 0.0001, "loss": 0.9903, "step": 3280 }, { "epoch": 0.18083182640144665, "grad_norm": 1.8459206819534302, "learning_rate": 0.0001, "loss": 0.9095, "step": 3300 }, { "epoch": 0.18192777686448572, "grad_norm": 2.6654012203216553, "learning_rate": 0.0001, "loss": 0.9854, "step": 3320 }, { "epoch": 0.18302372732752478, "grad_norm": 2.6444480419158936, "learning_rate": 0.0001, "loss": 0.8857, "step": 3340 }, { "epoch": 0.18411967779056387, "grad_norm": 2.190462827682495, "learning_rate": 0.0001, "loss": 0.9375, "step": 3360 }, { "epoch": 0.18521562825360294, "grad_norm": 2.8208882808685303, "learning_rate": 0.0001, "loss": 0.9646, "step": 3380 }, { "epoch": 0.186311578716642, "grad_norm": 2.4978795051574707, "learning_rate": 0.0001, "loss": 0.9724, "step": 3400 }, { "epoch": 0.18740752917968106, "grad_norm": 2.4202938079833984, "learning_rate": 0.0001, "loss": 0.9659, "step": 3420 }, { "epoch": 0.18850347964272016, "grad_norm": 1.9026118516921997, "learning_rate": 0.0001, "loss": 1.0321, "step": 3440 }, { "epoch": 0.18959943010575922, "grad_norm": 2.6031651496887207, "learning_rate": 0.0001, "loss": 0.9622, "step": 3460 }, { "epoch": 0.19069538056879828, "grad_norm": 1.962509274482727, "learning_rate": 0.0001, "loss": 1.0262, "step": 3480 }, { "epoch": 0.19179133103183735, "grad_norm": 2.794633626937866, "learning_rate": 0.0001, "loss": 1.0626, "step": 3500 }, { "epoch": 0.19288728149487644, "grad_norm": 2.4276185035705566, "learning_rate": 0.0001, "loss": 0.9961, "step": 3520 }, { "epoch": 0.1939832319579155, "grad_norm": 2.0747737884521484, "learning_rate": 0.0001, "loss": 0.8945, "step": 3540 }, { "epoch": 0.19507918242095457, "grad_norm": 1.9151681661605835, "learning_rate": 0.0001, "loss": 1.0664, "step": 3560 }, { "epoch": 0.19617513288399363, "grad_norm": 2.11547589302063, "learning_rate": 0.0001, "loss": 0.9865, "step": 3580 }, { "epoch": 0.19727108334703272, "grad_norm": 2.359848737716675, "learning_rate": 0.0001, "loss": 0.95, "step": 3600 }, { "epoch": 0.19836703381007179, "grad_norm": 1.9854378700256348, "learning_rate": 0.0001, "loss": 0.9992, "step": 3620 }, { "epoch": 0.19946298427311085, "grad_norm": 2.476423978805542, "learning_rate": 0.0001, "loss": 0.9097, "step": 3640 }, { "epoch": 0.2005589347361499, "grad_norm": 2.420011281967163, "learning_rate": 0.0001, "loss": 1.0167, "step": 3660 }, { "epoch": 0.201654885199189, "grad_norm": 2.12312388420105, "learning_rate": 0.0001, "loss": 0.9298, "step": 3680 }, { "epoch": 0.20275083566222807, "grad_norm": 1.9679986238479614, "learning_rate": 0.0001, "loss": 1.0064, "step": 3700 }, { "epoch": 0.20384678612526713, "grad_norm": 2.608135461807251, "learning_rate": 0.0001, "loss": 0.9396, "step": 3720 }, { "epoch": 0.2049427365883062, "grad_norm": 2.542102098464966, "learning_rate": 0.0001, "loss": 1.0868, "step": 3740 }, { "epoch": 0.2060386870513453, "grad_norm": 2.5252091884613037, "learning_rate": 0.0001, "loss": 1.0417, "step": 3760 }, { "epoch": 0.20713463751438435, "grad_norm": 1.98774254322052, "learning_rate": 0.0001, "loss": 0.9949, "step": 3780 }, { "epoch": 0.20823058797742341, "grad_norm": 1.9502965211868286, "learning_rate": 0.0001, "loss": 0.9862, "step": 3800 }, { "epoch": 0.20932653844046248, "grad_norm": 2.2537944316864014, "learning_rate": 0.0001, "loss": 0.9087, "step": 3820 }, { "epoch": 0.21042248890350157, "grad_norm": 2.2866523265838623, "learning_rate": 0.0001, "loss": 1.0128, "step": 3840 }, { "epoch": 0.21151843936654063, "grad_norm": 2.2907001972198486, "learning_rate": 0.0001, "loss": 0.9654, "step": 3860 }, { "epoch": 0.2126143898295797, "grad_norm": 2.5648560523986816, "learning_rate": 0.0001, "loss": 1.0269, "step": 3880 }, { "epoch": 0.21371034029261876, "grad_norm": 2.198974847793579, "learning_rate": 0.0001, "loss": 0.9823, "step": 3900 }, { "epoch": 0.21480629075565785, "grad_norm": 2.1045591831207275, "learning_rate": 0.0001, "loss": 0.9139, "step": 3920 }, { "epoch": 0.21590224121869692, "grad_norm": 2.1462857723236084, "learning_rate": 0.0001, "loss": 0.9406, "step": 3940 }, { "epoch": 0.21699819168173598, "grad_norm": 2.3216285705566406, "learning_rate": 0.0001, "loss": 0.8597, "step": 3960 }, { "epoch": 0.21809414214477504, "grad_norm": 1.867150068283081, "learning_rate": 0.0001, "loss": 0.9776, "step": 3980 }, { "epoch": 0.21919009260781414, "grad_norm": 2.3432791233062744, "learning_rate": 0.0001, "loss": 0.9546, "step": 4000 }, { "epoch": 0.21919009260781414, "eval_loss": 0.9323587417602539, "eval_runtime": 30935.2713, "eval_samples_per_second": 2.098, "eval_steps_per_second": 0.066, "eval_wer": 63.836951720973865, "step": 4000 }, { "epoch": 0.2202860430708532, "grad_norm": 1.9426536560058594, "learning_rate": 0.0001, "loss": 0.9291, "step": 4020 }, { "epoch": 0.22138199353389226, "grad_norm": 2.693723201751709, "learning_rate": 0.0001, "loss": 0.9072, "step": 4040 }, { "epoch": 0.22247794399693133, "grad_norm": 2.237900972366333, "learning_rate": 0.0001, "loss": 0.8571, "step": 4060 }, { "epoch": 0.22357389445997042, "grad_norm": 2.739129066467285, "learning_rate": 0.0001, "loss": 0.9132, "step": 4080 }, { "epoch": 0.22466984492300948, "grad_norm": 1.886438012123108, "learning_rate": 0.0001, "loss": 0.9646, "step": 4100 }, { "epoch": 0.22576579538604855, "grad_norm": 2.3505897521972656, "learning_rate": 0.0001, "loss": 1.0479, "step": 4120 }, { "epoch": 0.2268617458490876, "grad_norm": 2.4302868843078613, "learning_rate": 0.0001, "loss": 0.9956, "step": 4140 }, { "epoch": 0.2279576963121267, "grad_norm": 2.2747528553009033, "learning_rate": 0.0001, "loss": 0.9621, "step": 4160 }, { "epoch": 0.22905364677516576, "grad_norm": 2.312248945236206, "learning_rate": 0.0001, "loss": 0.9292, "step": 4180 }, { "epoch": 0.23014959723820483, "grad_norm": 2.0439066886901855, "learning_rate": 0.0001, "loss": 0.8804, "step": 4200 }, { "epoch": 0.2312455477012439, "grad_norm": 2.615898609161377, "learning_rate": 0.0001, "loss": 0.9302, "step": 4220 }, { "epoch": 0.23234149816428298, "grad_norm": 2.306796073913574, "learning_rate": 0.0001, "loss": 1.0401, "step": 4240 }, { "epoch": 0.23343744862732205, "grad_norm": 2.4527432918548584, "learning_rate": 0.0001, "loss": 0.9195, "step": 4260 }, { "epoch": 0.2345333990903611, "grad_norm": 1.8589290380477905, "learning_rate": 0.0001, "loss": 0.9284, "step": 4280 }, { "epoch": 0.23562934955340017, "grad_norm": 1.8492025136947632, "learning_rate": 0.0001, "loss": 0.8898, "step": 4300 }, { "epoch": 0.23672530001643927, "grad_norm": 2.574871063232422, "learning_rate": 0.0001, "loss": 1.0026, "step": 4320 }, { "epoch": 0.23782125047947833, "grad_norm": 2.2600936889648438, "learning_rate": 0.0001, "loss": 1.0738, "step": 4340 }, { "epoch": 0.2389172009425174, "grad_norm": 2.35066556930542, "learning_rate": 0.0001, "loss": 0.8573, "step": 4360 }, { "epoch": 0.24001315140555646, "grad_norm": 2.165745496749878, "learning_rate": 0.0001, "loss": 0.8989, "step": 4380 }, { "epoch": 0.24110910186859555, "grad_norm": 2.1494085788726807, "learning_rate": 0.0001, "loss": 0.8292, "step": 4400 }, { "epoch": 0.2422050523316346, "grad_norm": 2.185359239578247, "learning_rate": 0.0001, "loss": 0.8954, "step": 4420 }, { "epoch": 0.24330100279467368, "grad_norm": 2.193904161453247, "learning_rate": 0.0001, "loss": 0.8944, "step": 4440 }, { "epoch": 0.24439695325771274, "grad_norm": 2.1101438999176025, "learning_rate": 0.0001, "loss": 0.9059, "step": 4460 }, { "epoch": 0.24549290372075183, "grad_norm": 2.026642084121704, "learning_rate": 0.0001, "loss": 0.8978, "step": 4480 }, { "epoch": 0.2465888541837909, "grad_norm": 2.0481228828430176, "learning_rate": 0.0001, "loss": 0.8835, "step": 4500 }, { "epoch": 0.24768480464682996, "grad_norm": 2.201350688934326, "learning_rate": 0.0001, "loss": 0.9519, "step": 4520 }, { "epoch": 0.24878075510986902, "grad_norm": 1.852100133895874, "learning_rate": 0.0001, "loss": 0.8458, "step": 4540 }, { "epoch": 0.24987670557290811, "grad_norm": 2.1303794384002686, "learning_rate": 0.0001, "loss": 0.9092, "step": 4560 }, { "epoch": 0.25097265603594715, "grad_norm": 2.2715415954589844, "learning_rate": 0.0001, "loss": 0.8931, "step": 4580 }, { "epoch": 0.25206860649898627, "grad_norm": 2.091785192489624, "learning_rate": 0.0001, "loss": 0.8645, "step": 4600 }, { "epoch": 0.25316455696202533, "grad_norm": 2.108103036880493, "learning_rate": 0.0001, "loss": 0.8387, "step": 4620 }, { "epoch": 0.2542605074250644, "grad_norm": 2.083848237991333, "learning_rate": 0.0001, "loss": 0.8315, "step": 4640 }, { "epoch": 0.25535645788810346, "grad_norm": 1.570475459098816, "learning_rate": 0.0001, "loss": 0.9355, "step": 4660 }, { "epoch": 0.2564524083511425, "grad_norm": 1.90199875831604, "learning_rate": 0.0001, "loss": 0.8308, "step": 4680 }, { "epoch": 0.2575483588141816, "grad_norm": 2.1952812671661377, "learning_rate": 0.0001, "loss": 0.8618, "step": 4700 }, { "epoch": 0.25864430927722065, "grad_norm": 2.0530431270599365, "learning_rate": 0.0001, "loss": 0.7951, "step": 4720 }, { "epoch": 0.2597402597402597, "grad_norm": 2.202252149581909, "learning_rate": 0.0001, "loss": 0.8858, "step": 4740 }, { "epoch": 0.26083621020329883, "grad_norm": 1.9541796445846558, "learning_rate": 0.0001, "loss": 0.8466, "step": 4760 }, { "epoch": 0.2619321606663379, "grad_norm": 1.9440534114837646, "learning_rate": 0.0001, "loss": 0.8488, "step": 4780 }, { "epoch": 0.26302811112937696, "grad_norm": 2.569821834564209, "learning_rate": 0.0001, "loss": 0.963, "step": 4800 }, { "epoch": 0.264124061592416, "grad_norm": 1.8896031379699707, "learning_rate": 0.0001, "loss": 0.837, "step": 4820 }, { "epoch": 0.2652200120554551, "grad_norm": 1.9390859603881836, "learning_rate": 0.0001, "loss": 0.8855, "step": 4840 }, { "epoch": 0.26631596251849415, "grad_norm": 2.2261974811553955, "learning_rate": 0.0001, "loss": 0.8901, "step": 4860 }, { "epoch": 0.2674119129815332, "grad_norm": 2.0486056804656982, "learning_rate": 0.0001, "loss": 0.8073, "step": 4880 }, { "epoch": 0.2685078634445723, "grad_norm": 2.292015314102173, "learning_rate": 0.0001, "loss": 0.9492, "step": 4900 }, { "epoch": 0.2696038139076114, "grad_norm": 2.0762240886688232, "learning_rate": 0.0001, "loss": 0.8528, "step": 4920 }, { "epoch": 0.27069976437065046, "grad_norm": 1.870642066001892, "learning_rate": 0.0001, "loss": 0.9482, "step": 4940 }, { "epoch": 0.27179571483368953, "grad_norm": 2.436768054962158, "learning_rate": 0.0001, "loss": 0.9299, "step": 4960 }, { "epoch": 0.2728916652967286, "grad_norm": 2.505880832672119, "learning_rate": 0.0001, "loss": 0.9259, "step": 4980 }, { "epoch": 0.27398761575976766, "grad_norm": 1.717252492904663, "learning_rate": 0.0001, "loss": 0.8134, "step": 5000 }, { "epoch": 0.27398761575976766, "eval_loss": 0.8726964592933655, "eval_runtime": 30710.3822, "eval_samples_per_second": 2.113, "eval_steps_per_second": 0.066, "eval_wer": 52.213316533880224, "step": 5000 }, { "epoch": 0.2750835662228067, "grad_norm": 2.28765869140625, "learning_rate": 0.0001, "loss": 1.0229, "step": 5020 }, { "epoch": 0.2761795166858458, "grad_norm": 2.2264580726623535, "learning_rate": 0.0001, "loss": 0.8291, "step": 5040 }, { "epoch": 0.27727546714888485, "grad_norm": 1.9387757778167725, "learning_rate": 0.0001, "loss": 0.821, "step": 5060 }, { "epoch": 0.27837141761192397, "grad_norm": 2.8628933429718018, "learning_rate": 0.0001, "loss": 0.9521, "step": 5080 }, { "epoch": 0.27946736807496303, "grad_norm": 2.2691447734832764, "learning_rate": 0.0001, "loss": 0.8182, "step": 5100 }, { "epoch": 0.2805633185380021, "grad_norm": 1.9515260457992554, "learning_rate": 0.0001, "loss": 0.9342, "step": 5120 }, { "epoch": 0.28165926900104116, "grad_norm": 2.1714837551116943, "learning_rate": 0.0001, "loss": 0.9663, "step": 5140 }, { "epoch": 0.2827552194640802, "grad_norm": 2.0159664154052734, "learning_rate": 0.0001, "loss": 0.8294, "step": 5160 }, { "epoch": 0.2838511699271193, "grad_norm": 2.024634599685669, "learning_rate": 0.0001, "loss": 0.896, "step": 5180 }, { "epoch": 0.28494712039015835, "grad_norm": 2.0035595893859863, "learning_rate": 0.0001, "loss": 0.8446, "step": 5200 }, { "epoch": 0.2860430708531974, "grad_norm": 2.4142866134643555, "learning_rate": 0.0001, "loss": 0.8835, "step": 5220 }, { "epoch": 0.28713902131623653, "grad_norm": 2.070338010787964, "learning_rate": 0.0001, "loss": 0.8687, "step": 5240 }, { "epoch": 0.2882349717792756, "grad_norm": 1.9818578958511353, "learning_rate": 0.0001, "loss": 0.8296, "step": 5260 }, { "epoch": 0.28933092224231466, "grad_norm": 1.8923412561416626, "learning_rate": 0.0001, "loss": 0.8999, "step": 5280 }, { "epoch": 0.2904268727053537, "grad_norm": 2.200206995010376, "learning_rate": 0.0001, "loss": 0.8662, "step": 5300 }, { "epoch": 0.2915228231683928, "grad_norm": 1.982446551322937, "learning_rate": 0.0001, "loss": 0.8301, "step": 5320 }, { "epoch": 0.29261877363143185, "grad_norm": 1.934844732284546, "learning_rate": 0.0001, "loss": 0.8219, "step": 5340 }, { "epoch": 0.2937147240944709, "grad_norm": 2.2790510654449463, "learning_rate": 0.0001, "loss": 0.8666, "step": 5360 }, { "epoch": 0.29481067455751, "grad_norm": 1.771672248840332, "learning_rate": 0.0001, "loss": 0.843, "step": 5380 }, { "epoch": 0.2959066250205491, "grad_norm": 2.3459877967834473, "learning_rate": 0.0001, "loss": 0.8516, "step": 5400 }, { "epoch": 0.29700257548358816, "grad_norm": 2.156458854675293, "learning_rate": 0.0001, "loss": 0.8425, "step": 5420 }, { "epoch": 0.2980985259466272, "grad_norm": 1.9492950439453125, "learning_rate": 0.0001, "loss": 0.8445, "step": 5440 }, { "epoch": 0.2991944764096663, "grad_norm": 2.1061997413635254, "learning_rate": 0.0001, "loss": 0.8858, "step": 5460 }, { "epoch": 0.30029042687270535, "grad_norm": 2.3567299842834473, "learning_rate": 0.0001, "loss": 0.8376, "step": 5480 }, { "epoch": 0.3013863773357444, "grad_norm": 2.1302335262298584, "learning_rate": 0.0001, "loss": 0.8272, "step": 5500 }, { "epoch": 0.3024823277987835, "grad_norm": 2.2098424434661865, "learning_rate": 0.0001, "loss": 0.8742, "step": 5520 }, { "epoch": 0.30357827826182254, "grad_norm": 1.7558562755584717, "learning_rate": 0.0001, "loss": 0.8863, "step": 5540 }, { "epoch": 0.30467422872486166, "grad_norm": 1.8461397886276245, "learning_rate": 0.0001, "loss": 0.8792, "step": 5560 }, { "epoch": 0.3057701791879007, "grad_norm": 2.0006344318389893, "learning_rate": 0.0001, "loss": 0.8263, "step": 5580 }, { "epoch": 0.3068661296509398, "grad_norm": 1.6772565841674805, "learning_rate": 0.0001, "loss": 0.789, "step": 5600 }, { "epoch": 0.30796208011397885, "grad_norm": 1.9263228178024292, "learning_rate": 0.0001, "loss": 0.842, "step": 5620 }, { "epoch": 0.3090580305770179, "grad_norm": 1.8888592720031738, "learning_rate": 0.0001, "loss": 0.8475, "step": 5640 }, { "epoch": 0.310153981040057, "grad_norm": 2.2354602813720703, "learning_rate": 0.0001, "loss": 1.0036, "step": 5660 }, { "epoch": 0.31124993150309604, "grad_norm": 1.9634332656860352, "learning_rate": 0.0001, "loss": 0.8517, "step": 5680 }, { "epoch": 0.3123458819661351, "grad_norm": 2.348825216293335, "learning_rate": 0.0001, "loss": 0.8731, "step": 5700 }, { "epoch": 0.3134418324291742, "grad_norm": 2.487741708755493, "learning_rate": 0.0001, "loss": 0.8556, "step": 5720 }, { "epoch": 0.3145377828922133, "grad_norm": 1.999516248703003, "learning_rate": 0.0001, "loss": 0.7969, "step": 5740 }, { "epoch": 0.31563373335525235, "grad_norm": 1.9654616117477417, "learning_rate": 0.0001, "loss": 0.7843, "step": 5760 }, { "epoch": 0.3167296838182914, "grad_norm": 2.1070950031280518, "learning_rate": 0.0001, "loss": 0.8399, "step": 5780 }, { "epoch": 0.3178256342813305, "grad_norm": 2.257129192352295, "learning_rate": 0.0001, "loss": 0.8224, "step": 5800 }, { "epoch": 0.31892158474436955, "grad_norm": 1.8256118297576904, "learning_rate": 0.0001, "loss": 0.794, "step": 5820 }, { "epoch": 0.3200175352074086, "grad_norm": 1.8899625539779663, "learning_rate": 0.0001, "loss": 0.8614, "step": 5840 }, { "epoch": 0.3211134856704477, "grad_norm": 2.221484661102295, "learning_rate": 0.0001, "loss": 0.765, "step": 5860 }, { "epoch": 0.3222094361334868, "grad_norm": 1.796877384185791, "learning_rate": 0.0001, "loss": 0.8359, "step": 5880 }, { "epoch": 0.32330538659652586, "grad_norm": 1.7495447397232056, "learning_rate": 0.0001, "loss": 0.8688, "step": 5900 }, { "epoch": 0.3244013370595649, "grad_norm": 2.136664628982544, "learning_rate": 0.0001, "loss": 0.9163, "step": 5920 }, { "epoch": 0.325497287522604, "grad_norm": 1.8508238792419434, "learning_rate": 0.0001, "loss": 0.7975, "step": 5940 }, { "epoch": 0.32659323798564305, "grad_norm": 2.144523859024048, "learning_rate": 0.0001, "loss": 0.7749, "step": 5960 }, { "epoch": 0.3276891884486821, "grad_norm": 2.208815336227417, "learning_rate": 0.0001, "loss": 0.8148, "step": 5980 }, { "epoch": 0.3287851389117212, "grad_norm": 2.0617401599884033, "learning_rate": 0.0001, "loss": 0.8884, "step": 6000 }, { "epoch": 0.3287851389117212, "eval_loss": 0.8316722512245178, "eval_runtime": 30850.8589, "eval_samples_per_second": 2.103, "eval_steps_per_second": 0.066, "eval_wer": 45.9960352377659, "step": 6000 }, { "epoch": 0.32988108937476024, "grad_norm": 2.0406434535980225, "learning_rate": 0.0001, "loss": 0.8504, "step": 6020 }, { "epoch": 0.33097703983779936, "grad_norm": 2.1899139881134033, "learning_rate": 0.0001, "loss": 0.7782, "step": 6040 }, { "epoch": 0.3320729903008384, "grad_norm": 2.650421380996704, "learning_rate": 0.0001, "loss": 0.7823, "step": 6060 }, { "epoch": 0.3331689407638775, "grad_norm": 2.085683584213257, "learning_rate": 0.0001, "loss": 0.754, "step": 6080 }, { "epoch": 0.33426489122691655, "grad_norm": 2.1783502101898193, "learning_rate": 0.0001, "loss": 0.8819, "step": 6100 }, { "epoch": 0.3353608416899556, "grad_norm": 2.096208333969116, "learning_rate": 0.0001, "loss": 0.8702, "step": 6120 }, { "epoch": 0.3364567921529947, "grad_norm": 2.005629062652588, "learning_rate": 0.0001, "loss": 0.8827, "step": 6140 }, { "epoch": 0.33755274261603374, "grad_norm": 2.1545634269714355, "learning_rate": 0.0001, "loss": 0.8496, "step": 6160 }, { "epoch": 0.3386486930790728, "grad_norm": 1.8190851211547852, "learning_rate": 0.0001, "loss": 0.7622, "step": 6180 }, { "epoch": 0.3397446435421119, "grad_norm": 1.9555623531341553, "learning_rate": 0.0001, "loss": 0.8338, "step": 6200 }, { "epoch": 0.340840594005151, "grad_norm": 1.8530341386795044, "learning_rate": 0.0001, "loss": 0.8017, "step": 6220 }, { "epoch": 0.34193654446819005, "grad_norm": 1.8724114894866943, "learning_rate": 0.0001, "loss": 0.848, "step": 6240 }, { "epoch": 0.3430324949312291, "grad_norm": 1.8598796129226685, "learning_rate": 0.0001, "loss": 0.8074, "step": 6260 }, { "epoch": 0.3441284453942682, "grad_norm": 2.1442923545837402, "learning_rate": 0.0001, "loss": 0.8473, "step": 6280 }, { "epoch": 0.34522439585730724, "grad_norm": 2.3083174228668213, "learning_rate": 0.0001, "loss": 0.9016, "step": 6300 }, { "epoch": 0.3463203463203463, "grad_norm": 1.8194735050201416, "learning_rate": 0.0001, "loss": 0.8267, "step": 6320 }, { "epoch": 0.34741629678338537, "grad_norm": 2.063523054122925, "learning_rate": 0.0001, "loss": 0.7841, "step": 6340 }, { "epoch": 0.3485122472464245, "grad_norm": 2.17594051361084, "learning_rate": 0.0001, "loss": 0.8318, "step": 6360 }, { "epoch": 0.34960819770946355, "grad_norm": 1.665189504623413, "learning_rate": 0.0001, "loss": 0.7983, "step": 6380 }, { "epoch": 0.3507041481725026, "grad_norm": 2.2596445083618164, "learning_rate": 0.0001, "loss": 0.8421, "step": 6400 }, { "epoch": 0.3518000986355417, "grad_norm": 1.7096545696258545, "learning_rate": 0.0001, "loss": 0.889, "step": 6420 }, { "epoch": 0.35289604909858074, "grad_norm": 1.7475535869598389, "learning_rate": 0.0001, "loss": 0.8006, "step": 6440 }, { "epoch": 0.3539919995616198, "grad_norm": 1.8176007270812988, "learning_rate": 0.0001, "loss": 0.8632, "step": 6460 }, { "epoch": 0.35508795002465887, "grad_norm": 2.6806535720825195, "learning_rate": 0.0001, "loss": 0.8427, "step": 6480 }, { "epoch": 0.35618390048769794, "grad_norm": 2.094172477722168, "learning_rate": 0.0001, "loss": 0.7812, "step": 6500 }, { "epoch": 0.357279850950737, "grad_norm": 1.8341765403747559, "learning_rate": 0.0001, "loss": 0.8051, "step": 6520 }, { "epoch": 0.3583758014137761, "grad_norm": 2.2341349124908447, "learning_rate": 0.0001, "loss": 0.8001, "step": 6540 }, { "epoch": 0.3594717518768152, "grad_norm": 2.1017801761627197, "learning_rate": 0.0001, "loss": 0.8142, "step": 6560 }, { "epoch": 0.36056770233985425, "grad_norm": 1.9903994798660278, "learning_rate": 0.0001, "loss": 0.8117, "step": 6580 }, { "epoch": 0.3616636528028933, "grad_norm": 2.273465394973755, "learning_rate": 0.0001, "loss": 0.8864, "step": 6600 }, { "epoch": 0.3627596032659324, "grad_norm": 2.0767428874969482, "learning_rate": 0.0001, "loss": 0.7687, "step": 6620 }, { "epoch": 0.36385555372897144, "grad_norm": 2.559774398803711, "learning_rate": 0.0001, "loss": 0.8181, "step": 6640 }, { "epoch": 0.3649515041920105, "grad_norm": 2.1393582820892334, "learning_rate": 0.0001, "loss": 0.7936, "step": 6660 }, { "epoch": 0.36604745465504956, "grad_norm": 2.06675386428833, "learning_rate": 0.0001, "loss": 0.8263, "step": 6680 }, { "epoch": 0.3671434051180887, "grad_norm": 1.7674784660339355, "learning_rate": 0.0001, "loss": 0.7818, "step": 6700 }, { "epoch": 0.36823935558112775, "grad_norm": 1.765442132949829, "learning_rate": 0.0001, "loss": 0.8335, "step": 6720 }, { "epoch": 0.3693353060441668, "grad_norm": 2.044288158416748, "learning_rate": 0.0001, "loss": 0.8742, "step": 6740 }, { "epoch": 0.3704312565072059, "grad_norm": 1.9821726083755493, "learning_rate": 0.0001, "loss": 0.928, "step": 6760 }, { "epoch": 0.37152720697024494, "grad_norm": 2.0798370838165283, "learning_rate": 0.0001, "loss": 0.7627, "step": 6780 }, { "epoch": 0.372623157433284, "grad_norm": 1.6817582845687866, "learning_rate": 0.0001, "loss": 0.7985, "step": 6800 }, { "epoch": 0.37371910789632307, "grad_norm": 1.872247576713562, "learning_rate": 0.0001, "loss": 0.8102, "step": 6820 }, { "epoch": 0.37481505835936213, "grad_norm": 1.7761516571044922, "learning_rate": 0.0001, "loss": 0.8435, "step": 6840 }, { "epoch": 0.37591100882240125, "grad_norm": 1.739585518836975, "learning_rate": 0.0001, "loss": 0.8706, "step": 6860 }, { "epoch": 0.3770069592854403, "grad_norm": 2.0503687858581543, "learning_rate": 0.0001, "loss": 0.8354, "step": 6880 }, { "epoch": 0.3781029097484794, "grad_norm": 2.283393621444702, "learning_rate": 0.0001, "loss": 0.7476, "step": 6900 }, { "epoch": 0.37919886021151844, "grad_norm": 1.801018238067627, "learning_rate": 0.0001, "loss": 0.7817, "step": 6920 }, { "epoch": 0.3802948106745575, "grad_norm": 2.5343267917633057, "learning_rate": 0.0001, "loss": 0.7628, "step": 6940 }, { "epoch": 0.38139076113759657, "grad_norm": 2.010507822036743, "learning_rate": 0.0001, "loss": 0.7931, "step": 6960 }, { "epoch": 0.38248671160063563, "grad_norm": 1.7228796482086182, "learning_rate": 0.0001, "loss": 0.7517, "step": 6980 }, { "epoch": 0.3835826620636747, "grad_norm": 1.967822551727295, "learning_rate": 0.0001, "loss": 0.804, "step": 7000 }, { "epoch": 0.3835826620636747, "eval_loss": 0.7978512644767761, "eval_runtime": 30977.7517, "eval_samples_per_second": 2.095, "eval_steps_per_second": 0.065, "eval_wer": 61.261910549759826, "step": 7000 }, { "epoch": 0.3846786125267138, "grad_norm": 1.9999229907989502, "learning_rate": 0.0001, "loss": 0.7634, "step": 7020 }, { "epoch": 0.3857745629897529, "grad_norm": 1.956128716468811, "learning_rate": 0.0001, "loss": 0.8102, "step": 7040 }, { "epoch": 0.38687051345279194, "grad_norm": 2.0134966373443604, "learning_rate": 0.0001, "loss": 0.7957, "step": 7060 }, { "epoch": 0.387966463915831, "grad_norm": 2.0373167991638184, "learning_rate": 0.0001, "loss": 0.8251, "step": 7080 }, { "epoch": 0.38906241437887007, "grad_norm": 1.7772964239120483, "learning_rate": 0.0001, "loss": 0.8128, "step": 7100 }, { "epoch": 0.39015836484190913, "grad_norm": 1.7618379592895508, "learning_rate": 0.0001, "loss": 0.8345, "step": 7120 }, { "epoch": 0.3912543153049482, "grad_norm": 2.181671380996704, "learning_rate": 0.0001, "loss": 0.8345, "step": 7140 }, { "epoch": 0.39235026576798726, "grad_norm": 1.8794726133346558, "learning_rate": 0.0001, "loss": 0.7615, "step": 7160 }, { "epoch": 0.3934462162310264, "grad_norm": 1.9297798871994019, "learning_rate": 0.0001, "loss": 0.7618, "step": 7180 }, { "epoch": 0.39454216669406544, "grad_norm": 1.9441471099853516, "learning_rate": 0.0001, "loss": 0.859, "step": 7200 }, { "epoch": 0.3956381171571045, "grad_norm": 2.2561404705047607, "learning_rate": 0.0001, "loss": 0.7877, "step": 7220 }, { "epoch": 0.39673406762014357, "grad_norm": 1.8441416025161743, "learning_rate": 0.0001, "loss": 0.7734, "step": 7240 }, { "epoch": 0.39783001808318263, "grad_norm": 1.686120867729187, "learning_rate": 0.0001, "loss": 0.7066, "step": 7260 }, { "epoch": 0.3989259685462217, "grad_norm": 1.9456263780593872, "learning_rate": 0.0001, "loss": 0.7469, "step": 7280 }, { "epoch": 0.40002191900926076, "grad_norm": 1.9112725257873535, "learning_rate": 0.0001, "loss": 0.7607, "step": 7300 }, { "epoch": 0.4011178694722998, "grad_norm": 2.5668513774871826, "learning_rate": 0.0001, "loss": 0.7859, "step": 7320 }, { "epoch": 0.40221381993533895, "grad_norm": 1.9502942562103271, "learning_rate": 0.0001, "loss": 0.7607, "step": 7340 }, { "epoch": 0.403309770398378, "grad_norm": 1.6973525285720825, "learning_rate": 0.0001, "loss": 0.8313, "step": 7360 }, { "epoch": 0.4044057208614171, "grad_norm": 2.3962297439575195, "learning_rate": 0.0001, "loss": 0.7806, "step": 7380 }, { "epoch": 0.40550167132445614, "grad_norm": 1.887536883354187, "learning_rate": 0.0001, "loss": 0.7524, "step": 7400 }, { "epoch": 0.4065976217874952, "grad_norm": 1.999687910079956, "learning_rate": 0.0001, "loss": 0.7349, "step": 7420 }, { "epoch": 0.40769357225053426, "grad_norm": 1.7444576025009155, "learning_rate": 0.0001, "loss": 0.8156, "step": 7440 }, { "epoch": 0.40878952271357333, "grad_norm": 1.7175132036209106, "learning_rate": 0.0001, "loss": 0.7419, "step": 7460 }, { "epoch": 0.4098854731766124, "grad_norm": 2.23638653755188, "learning_rate": 0.0001, "loss": 0.666, "step": 7480 }, { "epoch": 0.4109814236396515, "grad_norm": 2.024102210998535, "learning_rate": 0.0001, "loss": 0.7541, "step": 7500 }, { "epoch": 0.4120773741026906, "grad_norm": 2.042541265487671, "learning_rate": 0.0001, "loss": 0.7915, "step": 7520 }, { "epoch": 0.41317332456572964, "grad_norm": 1.9140897989273071, "learning_rate": 0.0001, "loss": 0.8712, "step": 7540 }, { "epoch": 0.4142692750287687, "grad_norm": 1.8435416221618652, "learning_rate": 0.0001, "loss": 0.8241, "step": 7560 }, { "epoch": 0.41536522549180777, "grad_norm": 2.027944803237915, "learning_rate": 0.0001, "loss": 0.9422, "step": 7580 }, { "epoch": 0.41646117595484683, "grad_norm": 2.07381534576416, "learning_rate": 0.0001, "loss": 0.812, "step": 7600 }, { "epoch": 0.4175571264178859, "grad_norm": 1.9762136936187744, "learning_rate": 0.0001, "loss": 0.7852, "step": 7620 }, { "epoch": 0.41865307688092496, "grad_norm": 1.8222426176071167, "learning_rate": 0.0001, "loss": 0.752, "step": 7640 }, { "epoch": 0.4197490273439641, "grad_norm": 2.0519089698791504, "learning_rate": 0.0001, "loss": 0.8031, "step": 7660 }, { "epoch": 0.42084497780700314, "grad_norm": 1.8777110576629639, "learning_rate": 0.0001, "loss": 0.8173, "step": 7680 }, { "epoch": 0.4219409282700422, "grad_norm": 2.323411703109741, "learning_rate": 0.0001, "loss": 0.8479, "step": 7700 }, { "epoch": 0.42303687873308127, "grad_norm": 1.6403400897979736, "learning_rate": 0.0001, "loss": 0.7567, "step": 7720 }, { "epoch": 0.42413282919612033, "grad_norm": 1.6627925634384155, "learning_rate": 0.0001, "loss": 0.7734, "step": 7740 }, { "epoch": 0.4252287796591594, "grad_norm": 1.8771709203720093, "learning_rate": 0.0001, "loss": 0.7652, "step": 7760 }, { "epoch": 0.42632473012219846, "grad_norm": 1.9806597232818604, "learning_rate": 0.0001, "loss": 0.7699, "step": 7780 }, { "epoch": 0.4274206805852375, "grad_norm": 2.1376988887786865, "learning_rate": 0.0001, "loss": 0.7825, "step": 7800 }, { "epoch": 0.42851663104827664, "grad_norm": 1.5566449165344238, "learning_rate": 0.0001, "loss": 0.704, "step": 7820 }, { "epoch": 0.4296125815113157, "grad_norm": 2.1835947036743164, "learning_rate": 0.0001, "loss": 0.8101, "step": 7840 }, { "epoch": 0.43070853197435477, "grad_norm": 2.055119037628174, "learning_rate": 0.0001, "loss": 0.703, "step": 7860 }, { "epoch": 0.43180448243739383, "grad_norm": 1.9324967861175537, "learning_rate": 0.0001, "loss": 0.81, "step": 7880 }, { "epoch": 0.4329004329004329, "grad_norm": 2.1087846755981445, "learning_rate": 0.0001, "loss": 0.7676, "step": 7900 }, { "epoch": 0.43399638336347196, "grad_norm": 1.8521897792816162, "learning_rate": 0.0001, "loss": 0.7546, "step": 7920 }, { "epoch": 0.435092333826511, "grad_norm": 2.145947217941284, "learning_rate": 0.0001, "loss": 0.7992, "step": 7940 }, { "epoch": 0.4361882842895501, "grad_norm": 1.7739931344985962, "learning_rate": 0.0001, "loss": 0.7133, "step": 7960 }, { "epoch": 0.4372842347525892, "grad_norm": 1.6032921075820923, "learning_rate": 0.0001, "loss": 0.8207, "step": 7980 }, { "epoch": 0.43838018521562827, "grad_norm": 2.1895668506622314, "learning_rate": 0.0001, "loss": 0.7638, "step": 8000 }, { "epoch": 0.43838018521562827, "eval_loss": 0.770411491394043, "eval_runtime": 30675.7059, "eval_samples_per_second": 2.115, "eval_steps_per_second": 0.066, "eval_wer": 43.10069742838263, "step": 8000 } ], "logging_steps": 20, "max_steps": 54747, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.578723706312065e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }