{ "best_metric": 16.21523264881726, "best_model_checkpoint": "./whisper-large-v3-turbo/checkpoint-10000", "epoch": 3.461405330564209, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008653513326410523, "grad_norm": 9.31276798248291, "learning_rate": 5.000000000000001e-07, "loss": 0.6314, "step": 25 }, { "epoch": 0.017307026652821047, "grad_norm": 6.611477851867676, "learning_rate": 1.0000000000000002e-06, "loss": 0.4058, "step": 50 }, { "epoch": 0.02596053997923157, "grad_norm": 5.953363418579102, "learning_rate": 1.5e-06, "loss": 0.2556, "step": 75 }, { "epoch": 0.034614053305642094, "grad_norm": 4.594871520996094, "learning_rate": 2.0000000000000003e-06, "loss": 0.2411, "step": 100 }, { "epoch": 0.04326756663205261, "grad_norm": 5.638365268707275, "learning_rate": 2.5e-06, "loss": 0.2421, "step": 125 }, { "epoch": 0.05192107995846314, "grad_norm": 6.280882835388184, "learning_rate": 3e-06, "loss": 0.245, "step": 150 }, { "epoch": 0.060574593284873655, "grad_norm": 4.423807144165039, "learning_rate": 3.5e-06, "loss": 0.2556, "step": 175 }, { "epoch": 0.06922810661128419, "grad_norm": 5.257762908935547, "learning_rate": 4.000000000000001e-06, "loss": 0.243, "step": 200 }, { "epoch": 0.0778816199376947, "grad_norm": 4.895700931549072, "learning_rate": 4.5e-06, "loss": 0.2607, "step": 225 }, { "epoch": 0.08653513326410522, "grad_norm": 5.383410453796387, "learning_rate": 5e-06, "loss": 0.2451, "step": 250 }, { "epoch": 0.09518864659051575, "grad_norm": 6.303346157073975, "learning_rate": 5.500000000000001e-06, "loss": 0.2316, "step": 275 }, { "epoch": 0.10384215991692627, "grad_norm": 3.834745168685913, "learning_rate": 6e-06, "loss": 0.2511, "step": 300 }, { "epoch": 0.1124956732433368, "grad_norm": 4.793943405151367, "learning_rate": 6.5000000000000004e-06, "loss": 0.258, "step": 325 }, { "epoch": 0.12114918656974731, "grad_norm": 4.196424961090088, "learning_rate": 7e-06, "loss": 0.2635, "step": 350 }, { "epoch": 0.12980269989615784, "grad_norm": 5.759880065917969, "learning_rate": 7.500000000000001e-06, "loss": 0.2644, "step": 375 }, { "epoch": 0.13845621322256838, "grad_norm": 4.871682167053223, "learning_rate": 8.000000000000001e-06, "loss": 0.2513, "step": 400 }, { "epoch": 0.1471097265489789, "grad_norm": 4.624505996704102, "learning_rate": 8.5e-06, "loss": 0.2601, "step": 425 }, { "epoch": 0.1557632398753894, "grad_norm": 5.247982501983643, "learning_rate": 9e-06, "loss": 0.254, "step": 450 }, { "epoch": 0.16441675320179994, "grad_norm": 5.218228816986084, "learning_rate": 9.5e-06, "loss": 0.2717, "step": 475 }, { "epoch": 0.17307026652821045, "grad_norm": 5.001543998718262, "learning_rate": 1e-05, "loss": 0.2829, "step": 500 }, { "epoch": 0.17307026652821045, "eval_loss": 0.27819713950157166, "eval_runtime": 8630.7687, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.074, "eval_wer": 23.738844120960056, "step": 500 }, { "epoch": 0.181723779854621, "grad_norm": 5.3592023849487305, "learning_rate": 9.973684210526316e-06, "loss": 0.2837, "step": 525 }, { "epoch": 0.1903772931810315, "grad_norm": 5.0274658203125, "learning_rate": 9.947368421052632e-06, "loss": 0.2873, "step": 550 }, { "epoch": 0.199030806507442, "grad_norm": 6.059903144836426, "learning_rate": 9.921052631578947e-06, "loss": 0.2927, "step": 575 }, { "epoch": 0.20768431983385255, "grad_norm": 7.239508152008057, "learning_rate": 9.894736842105264e-06, "loss": 0.2662, "step": 600 }, { "epoch": 0.21633783316026306, "grad_norm": 5.860602855682373, "learning_rate": 9.868421052631579e-06, "loss": 0.2847, "step": 625 }, { "epoch": 0.2249913464866736, "grad_norm": 5.402172565460205, "learning_rate": 9.842105263157896e-06, "loss": 0.2653, "step": 650 }, { "epoch": 0.2336448598130841, "grad_norm": 5.541703224182129, "learning_rate": 9.815789473684212e-06, "loss": 0.2994, "step": 675 }, { "epoch": 0.24229837313949462, "grad_norm": 4.814186096191406, "learning_rate": 9.789473684210527e-06, "loss": 0.2576, "step": 700 }, { "epoch": 0.25095188646590516, "grad_norm": 4.134284496307373, "learning_rate": 9.763157894736844e-06, "loss": 0.2788, "step": 725 }, { "epoch": 0.25960539979231567, "grad_norm": 5.382356643676758, "learning_rate": 9.736842105263159e-06, "loss": 0.2902, "step": 750 }, { "epoch": 0.2682589131187262, "grad_norm": 4.981515884399414, "learning_rate": 9.710526315789474e-06, "loss": 0.271, "step": 775 }, { "epoch": 0.27691242644513675, "grad_norm": 4.840052127838135, "learning_rate": 9.68421052631579e-06, "loss": 0.2717, "step": 800 }, { "epoch": 0.28556593977154726, "grad_norm": 4.619823932647705, "learning_rate": 9.657894736842106e-06, "loss": 0.2763, "step": 825 }, { "epoch": 0.2942194530979578, "grad_norm": 5.049735069274902, "learning_rate": 9.631578947368422e-06, "loss": 0.2709, "step": 850 }, { "epoch": 0.3028729664243683, "grad_norm": 4.263411045074463, "learning_rate": 9.605263157894737e-06, "loss": 0.2575, "step": 875 }, { "epoch": 0.3115264797507788, "grad_norm": 5.51076078414917, "learning_rate": 9.578947368421054e-06, "loss": 0.2775, "step": 900 }, { "epoch": 0.32017999307718936, "grad_norm": 3.7715821266174316, "learning_rate": 9.552631578947369e-06, "loss": 0.2767, "step": 925 }, { "epoch": 0.3288335064035999, "grad_norm": 3.964357852935791, "learning_rate": 9.526315789473684e-06, "loss": 0.2593, "step": 950 }, { "epoch": 0.3374870197300104, "grad_norm": 4.967723369598389, "learning_rate": 9.5e-06, "loss": 0.2445, "step": 975 }, { "epoch": 0.3461405330564209, "grad_norm": 6.19343376159668, "learning_rate": 9.473684210526315e-06, "loss": 0.2671, "step": 1000 }, { "epoch": 0.3461405330564209, "eval_loss": 0.26496633887290955, "eval_runtime": 8635.3808, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.074, "eval_wer": 22.229442855905035, "step": 1000 }, { "epoch": 0.3547940463828314, "grad_norm": 4.938564777374268, "learning_rate": 9.447368421052632e-06, "loss": 0.261, "step": 1025 }, { "epoch": 0.363447559709242, "grad_norm": 4.535635948181152, "learning_rate": 9.421052631578949e-06, "loss": 0.2471, "step": 1050 }, { "epoch": 0.3721010730356525, "grad_norm": 4.910510540008545, "learning_rate": 9.394736842105264e-06, "loss": 0.2701, "step": 1075 }, { "epoch": 0.380754586362063, "grad_norm": 4.105949878692627, "learning_rate": 9.36842105263158e-06, "loss": 0.2342, "step": 1100 }, { "epoch": 0.3894080996884735, "grad_norm": 4.819608211517334, "learning_rate": 9.342105263157895e-06, "loss": 0.2704, "step": 1125 }, { "epoch": 0.398061613014884, "grad_norm": 6.137063503265381, "learning_rate": 9.315789473684212e-06, "loss": 0.258, "step": 1150 }, { "epoch": 0.4067151263412946, "grad_norm": 4.703615665435791, "learning_rate": 9.289473684210527e-06, "loss": 0.2602, "step": 1175 }, { "epoch": 0.4153686396677051, "grad_norm": 4.942866325378418, "learning_rate": 9.263157894736842e-06, "loss": 0.2562, "step": 1200 }, { "epoch": 0.4240221529941156, "grad_norm": 4.163381099700928, "learning_rate": 9.236842105263159e-06, "loss": 0.2398, "step": 1225 }, { "epoch": 0.4326756663205261, "grad_norm": 4.933504104614258, "learning_rate": 9.210526315789474e-06, "loss": 0.2423, "step": 1250 }, { "epoch": 0.44132917964693663, "grad_norm": 4.699647426605225, "learning_rate": 9.18421052631579e-06, "loss": 0.2659, "step": 1275 }, { "epoch": 0.4499826929733472, "grad_norm": 5.076835632324219, "learning_rate": 9.157894736842105e-06, "loss": 0.2679, "step": 1300 }, { "epoch": 0.4586362062997577, "grad_norm": 4.333568572998047, "learning_rate": 9.131578947368422e-06, "loss": 0.2475, "step": 1325 }, { "epoch": 0.4672897196261682, "grad_norm": 4.654094219207764, "learning_rate": 9.105263157894739e-06, "loss": 0.2353, "step": 1350 }, { "epoch": 0.47594323295257873, "grad_norm": 3.9147582054138184, "learning_rate": 9.078947368421054e-06, "loss": 0.232, "step": 1375 }, { "epoch": 0.48459674627898924, "grad_norm": 3.9528894424438477, "learning_rate": 9.05263157894737e-06, "loss": 0.2312, "step": 1400 }, { "epoch": 0.4932502596053998, "grad_norm": 5.073605060577393, "learning_rate": 9.026315789473685e-06, "loss": 0.2529, "step": 1425 }, { "epoch": 0.5019037729318103, "grad_norm": 4.176553249359131, "learning_rate": 9e-06, "loss": 0.2459, "step": 1450 }, { "epoch": 0.5105572862582208, "grad_norm": 3.9072072505950928, "learning_rate": 8.973684210526317e-06, "loss": 0.2647, "step": 1475 }, { "epoch": 0.5192107995846313, "grad_norm": 5.062324523925781, "learning_rate": 8.947368421052632e-06, "loss": 0.2549, "step": 1500 }, { "epoch": 0.5192107995846313, "eval_loss": 0.24746711552143097, "eval_runtime": 8673.5525, "eval_samples_per_second": 1.184, "eval_steps_per_second": 0.074, "eval_wer": 21.05710077116368, "step": 1500 }, { "epoch": 0.5278643129110419, "grad_norm": 3.6110143661499023, "learning_rate": 8.921052631578949e-06, "loss": 0.2335, "step": 1525 }, { "epoch": 0.5365178262374524, "grad_norm": 5.8853607177734375, "learning_rate": 8.894736842105264e-06, "loss": 0.2516, "step": 1550 }, { "epoch": 0.5451713395638629, "grad_norm": 5.245302200317383, "learning_rate": 8.86842105263158e-06, "loss": 0.2456, "step": 1575 }, { "epoch": 0.5538248528902735, "grad_norm": 3.9259748458862305, "learning_rate": 8.842105263157895e-06, "loss": 0.2426, "step": 1600 }, { "epoch": 0.562478366216684, "grad_norm": 5.401766300201416, "learning_rate": 8.81578947368421e-06, "loss": 0.2489, "step": 1625 }, { "epoch": 0.5711318795430945, "grad_norm": 3.4733078479766846, "learning_rate": 8.789473684210527e-06, "loss": 0.237, "step": 1650 }, { "epoch": 0.579785392869505, "grad_norm": 5.746425151824951, "learning_rate": 8.763157894736842e-06, "loss": 0.262, "step": 1675 }, { "epoch": 0.5884389061959155, "grad_norm": 4.111097812652588, "learning_rate": 8.736842105263158e-06, "loss": 0.2559, "step": 1700 }, { "epoch": 0.5970924195223261, "grad_norm": 3.773117780685425, "learning_rate": 8.710526315789475e-06, "loss": 0.2567, "step": 1725 }, { "epoch": 0.6057459328487366, "grad_norm": 3.213146209716797, "learning_rate": 8.68421052631579e-06, "loss": 0.2361, "step": 1750 }, { "epoch": 0.6143994461751471, "grad_norm": 3.5634965896606445, "learning_rate": 8.657894736842107e-06, "loss": 0.2632, "step": 1775 }, { "epoch": 0.6230529595015576, "grad_norm": 3.3568804264068604, "learning_rate": 8.631578947368422e-06, "loss": 0.2278, "step": 1800 }, { "epoch": 0.6317064728279681, "grad_norm": 3.8863000869750977, "learning_rate": 8.605263157894738e-06, "loss": 0.2336, "step": 1825 }, { "epoch": 0.6403599861543787, "grad_norm": 4.37355899810791, "learning_rate": 8.578947368421053e-06, "loss": 0.2435, "step": 1850 }, { "epoch": 0.6490134994807892, "grad_norm": 5.477795600891113, "learning_rate": 8.552631578947368e-06, "loss": 0.248, "step": 1875 }, { "epoch": 0.6576670128071997, "grad_norm": 5.682942867279053, "learning_rate": 8.526315789473685e-06, "loss": 0.2478, "step": 1900 }, { "epoch": 0.6663205261336103, "grad_norm": 4.837137222290039, "learning_rate": 8.5e-06, "loss": 0.226, "step": 1925 }, { "epoch": 0.6749740394600208, "grad_norm": 5.188834190368652, "learning_rate": 8.473684210526317e-06, "loss": 0.2294, "step": 1950 }, { "epoch": 0.6836275527864313, "grad_norm": 3.51971173286438, "learning_rate": 8.447368421052632e-06, "loss": 0.2357, "step": 1975 }, { "epoch": 0.6922810661128418, "grad_norm": 6.168539047241211, "learning_rate": 8.421052631578948e-06, "loss": 0.243, "step": 2000 }, { "epoch": 0.6922810661128418, "eval_loss": 0.23871001601219177, "eval_runtime": 8675.6286, "eval_samples_per_second": 1.184, "eval_steps_per_second": 0.074, "eval_wer": 20.804956242959882, "step": 2000 }, { "epoch": 0.7009345794392523, "grad_norm": 4.645936965942383, "learning_rate": 8.394736842105263e-06, "loss": 0.2265, "step": 2025 }, { "epoch": 0.7095880927656628, "grad_norm": 5.751936435699463, "learning_rate": 8.36842105263158e-06, "loss": 0.2491, "step": 2050 }, { "epoch": 0.7182416060920734, "grad_norm": 3.7281875610351562, "learning_rate": 8.342105263157897e-06, "loss": 0.2671, "step": 2075 }, { "epoch": 0.726895119418484, "grad_norm": 3.756186008453369, "learning_rate": 8.315789473684212e-06, "loss": 0.214, "step": 2100 }, { "epoch": 0.7355486327448945, "grad_norm": 4.607492923736572, "learning_rate": 8.289473684210526e-06, "loss": 0.251, "step": 2125 }, { "epoch": 0.744202146071305, "grad_norm": 6.176618576049805, "learning_rate": 8.263157894736843e-06, "loss": 0.2532, "step": 2150 }, { "epoch": 0.7528556593977155, "grad_norm": 5.2198166847229, "learning_rate": 8.236842105263158e-06, "loss": 0.2405, "step": 2175 }, { "epoch": 0.761509172724126, "grad_norm": 4.314031600952148, "learning_rate": 8.210526315789475e-06, "loss": 0.2287, "step": 2200 }, { "epoch": 0.7701626860505365, "grad_norm": 5.143173694610596, "learning_rate": 8.18421052631579e-06, "loss": 0.2285, "step": 2225 }, { "epoch": 0.778816199376947, "grad_norm": 7.833088397979736, "learning_rate": 8.157894736842106e-06, "loss": 0.2359, "step": 2250 }, { "epoch": 0.7874697127033575, "grad_norm": 4.4802703857421875, "learning_rate": 8.131578947368421e-06, "loss": 0.2377, "step": 2275 }, { "epoch": 0.796123226029768, "grad_norm": 4.503852367401123, "learning_rate": 8.105263157894736e-06, "loss": 0.2325, "step": 2300 }, { "epoch": 0.8047767393561787, "grad_norm": 4.415956020355225, "learning_rate": 8.078947368421053e-06, "loss": 0.2438, "step": 2325 }, { "epoch": 0.8134302526825892, "grad_norm": 6.339819431304932, "learning_rate": 8.052631578947368e-06, "loss": 0.2479, "step": 2350 }, { "epoch": 0.8220837660089997, "grad_norm": 4.9156813621521, "learning_rate": 8.026315789473685e-06, "loss": 0.2195, "step": 2375 }, { "epoch": 0.8307372793354102, "grad_norm": 5.688671112060547, "learning_rate": 8.000000000000001e-06, "loss": 0.2199, "step": 2400 }, { "epoch": 0.8393907926618207, "grad_norm": 4.447849750518799, "learning_rate": 7.973684210526316e-06, "loss": 0.2429, "step": 2425 }, { "epoch": 0.8480443059882312, "grad_norm": 3.792633295059204, "learning_rate": 7.947368421052633e-06, "loss": 0.2189, "step": 2450 }, { "epoch": 0.8566978193146417, "grad_norm": 4.0045247077941895, "learning_rate": 7.921052631578948e-06, "loss": 0.2296, "step": 2475 }, { "epoch": 0.8653513326410522, "grad_norm": 4.449003219604492, "learning_rate": 7.894736842105265e-06, "loss": 0.2136, "step": 2500 }, { "epoch": 0.8653513326410522, "eval_loss": 0.23298430442810059, "eval_runtime": 8676.1006, "eval_samples_per_second": 1.184, "eval_steps_per_second": 0.074, "eval_wer": 20.03725846980331, "step": 2500 }, { "epoch": 0.8740048459674628, "grad_norm": 4.327373027801514, "learning_rate": 7.86842105263158e-06, "loss": 0.227, "step": 2525 }, { "epoch": 0.8826583592938733, "grad_norm": 4.755936145782471, "learning_rate": 7.842105263157895e-06, "loss": 0.2291, "step": 2550 }, { "epoch": 0.8913118726202839, "grad_norm": 4.75525426864624, "learning_rate": 7.815789473684211e-06, "loss": 0.2418, "step": 2575 }, { "epoch": 0.8999653859466944, "grad_norm": 4.342800140380859, "learning_rate": 7.789473684210526e-06, "loss": 0.2316, "step": 2600 }, { "epoch": 0.9086188992731049, "grad_norm": 4.322353363037109, "learning_rate": 7.763157894736843e-06, "loss": 0.2242, "step": 2625 }, { "epoch": 0.9172724125995154, "grad_norm": 4.406942367553711, "learning_rate": 7.736842105263158e-06, "loss": 0.2178, "step": 2650 }, { "epoch": 0.9259259259259259, "grad_norm": 5.0642266273498535, "learning_rate": 7.710526315789474e-06, "loss": 0.2335, "step": 2675 }, { "epoch": 0.9345794392523364, "grad_norm": 4.1676483154296875, "learning_rate": 7.68421052631579e-06, "loss": 0.226, "step": 2700 }, { "epoch": 0.943232952578747, "grad_norm": 4.0350022315979, "learning_rate": 7.657894736842106e-06, "loss": 0.2388, "step": 2725 }, { "epoch": 0.9518864659051575, "grad_norm": 4.125761032104492, "learning_rate": 7.631578947368423e-06, "loss": 0.2356, "step": 2750 }, { "epoch": 0.960539979231568, "grad_norm": 3.9152023792266846, "learning_rate": 7.605263157894738e-06, "loss": 0.2089, "step": 2775 }, { "epoch": 0.9691934925579785, "grad_norm": 4.8811821937561035, "learning_rate": 7.578947368421054e-06, "loss": 0.2059, "step": 2800 }, { "epoch": 0.9778470058843891, "grad_norm": 4.5911712646484375, "learning_rate": 7.552631578947369e-06, "loss": 0.2155, "step": 2825 }, { "epoch": 0.9865005192107996, "grad_norm": 4.353863716125488, "learning_rate": 7.526315789473685e-06, "loss": 0.2145, "step": 2850 }, { "epoch": 0.9951540325372101, "grad_norm": 5.159242153167725, "learning_rate": 7.500000000000001e-06, "loss": 0.2338, "step": 2875 }, { "epoch": 1.0038075458636206, "grad_norm": 3.813417673110962, "learning_rate": 7.473684210526316e-06, "loss": 0.1849, "step": 2900 }, { "epoch": 1.0124610591900312, "grad_norm": 3.838930368423462, "learning_rate": 7.447368421052632e-06, "loss": 0.1596, "step": 2925 }, { "epoch": 1.0211145725164417, "grad_norm": 3.80027174949646, "learning_rate": 7.421052631578948e-06, "loss": 0.184, "step": 2950 }, { "epoch": 1.0297680858428522, "grad_norm": 3.2930946350097656, "learning_rate": 7.3947368421052635e-06, "loss": 0.169, "step": 2975 }, { "epoch": 1.0384215991692627, "grad_norm": 3.8618459701538086, "learning_rate": 7.368421052631579e-06, "loss": 0.1664, "step": 3000 }, { "epoch": 1.0384215991692627, "eval_loss": 0.22383837401866913, "eval_runtime": 8630.3289, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.074, "eval_wer": 18.63530023394853, "step": 3000 }, { "epoch": 1.0470751124956732, "grad_norm": 4.719282627105713, "learning_rate": 7.342105263157895e-06, "loss": 0.1675, "step": 3025 }, { "epoch": 1.0557286258220837, "grad_norm": 2.7439825534820557, "learning_rate": 7.315789473684212e-06, "loss": 0.1656, "step": 3050 }, { "epoch": 1.0643821391484942, "grad_norm": 4.707197189331055, "learning_rate": 7.289473684210528e-06, "loss": 0.1743, "step": 3075 }, { "epoch": 1.0730356524749047, "grad_norm": 3.8877105712890625, "learning_rate": 7.263157894736843e-06, "loss": 0.1706, "step": 3100 }, { "epoch": 1.0816891658013152, "grad_norm": 3.034952402114868, "learning_rate": 7.236842105263158e-06, "loss": 0.1575, "step": 3125 }, { "epoch": 1.0903426791277258, "grad_norm": 3.1362013816833496, "learning_rate": 7.210526315789474e-06, "loss": 0.1624, "step": 3150 }, { "epoch": 1.0989961924541363, "grad_norm": 3.822435140609741, "learning_rate": 7.18421052631579e-06, "loss": 0.1575, "step": 3175 }, { "epoch": 1.107649705780547, "grad_norm": 3.342021942138672, "learning_rate": 7.157894736842106e-06, "loss": 0.1456, "step": 3200 }, { "epoch": 1.1163032191069575, "grad_norm": 2.8061094284057617, "learning_rate": 7.131578947368422e-06, "loss": 0.1573, "step": 3225 }, { "epoch": 1.124956732433368, "grad_norm": 4.738641738891602, "learning_rate": 7.1052631578947375e-06, "loss": 0.1753, "step": 3250 }, { "epoch": 1.1336102457597785, "grad_norm": 2.7924444675445557, "learning_rate": 7.078947368421053e-06, "loss": 0.1542, "step": 3275 }, { "epoch": 1.142263759086189, "grad_norm": 3.8055057525634766, "learning_rate": 7.052631578947369e-06, "loss": 0.1683, "step": 3300 }, { "epoch": 1.1509172724125996, "grad_norm": 2.7615177631378174, "learning_rate": 7.026315789473684e-06, "loss": 0.1607, "step": 3325 }, { "epoch": 1.15957078573901, "grad_norm": 3.5338289737701416, "learning_rate": 7e-06, "loss": 0.1818, "step": 3350 }, { "epoch": 1.1682242990654206, "grad_norm": 4.972025394439697, "learning_rate": 6.973684210526316e-06, "loss": 0.1683, "step": 3375 }, { "epoch": 1.176877812391831, "grad_norm": 2.7351698875427246, "learning_rate": 6.947368421052632e-06, "loss": 0.163, "step": 3400 }, { "epoch": 1.1855313257182416, "grad_norm": 2.600933074951172, "learning_rate": 6.921052631578948e-06, "loss": 0.1639, "step": 3425 }, { "epoch": 1.1941848390446521, "grad_norm": 3.196901798248291, "learning_rate": 6.894736842105264e-06, "loss": 0.1689, "step": 3450 }, { "epoch": 1.2028383523710626, "grad_norm": 4.408321380615234, "learning_rate": 6.86842105263158e-06, "loss": 0.1853, "step": 3475 }, { "epoch": 1.2114918656974731, "grad_norm": 3.1869866847991943, "learning_rate": 6.842105263157896e-06, "loss": 0.1781, "step": 3500 }, { "epoch": 1.2114918656974731, "eval_loss": 0.22067983448505402, "eval_runtime": 8630.1006, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.074, "eval_wer": 18.572913958929036, "step": 3500 }, { "epoch": 1.2201453790238836, "grad_norm": 3.745699882507324, "learning_rate": 6.8157894736842115e-06, "loss": 0.1685, "step": 3525 }, { "epoch": 1.2287988923502942, "grad_norm": 4.755461692810059, "learning_rate": 6.789473684210527e-06, "loss": 0.1653, "step": 3550 }, { "epoch": 1.2374524056767047, "grad_norm": 2.958872079849243, "learning_rate": 6.763157894736842e-06, "loss": 0.1657, "step": 3575 }, { "epoch": 1.2461059190031152, "grad_norm": 3.780946969985962, "learning_rate": 6.736842105263158e-06, "loss": 0.1818, "step": 3600 }, { "epoch": 1.254759432329526, "grad_norm": 3.9823403358459473, "learning_rate": 6.710526315789474e-06, "loss": 0.1705, "step": 3625 }, { "epoch": 1.2634129456559364, "grad_norm": 3.881185531616211, "learning_rate": 6.68421052631579e-06, "loss": 0.1688, "step": 3650 }, { "epoch": 1.272066458982347, "grad_norm": 3.2562785148620605, "learning_rate": 6.6578947368421055e-06, "loss": 0.1597, "step": 3675 }, { "epoch": 1.2807199723087574, "grad_norm": 4.002935886383057, "learning_rate": 6.631578947368421e-06, "loss": 0.1653, "step": 3700 }, { "epoch": 1.289373485635168, "grad_norm": 3.866936206817627, "learning_rate": 6.605263157894738e-06, "loss": 0.1687, "step": 3725 }, { "epoch": 1.2980269989615785, "grad_norm": 4.491256237030029, "learning_rate": 6.578947368421054e-06, "loss": 0.184, "step": 3750 }, { "epoch": 1.306680512287989, "grad_norm": 2.8679704666137695, "learning_rate": 6.55263157894737e-06, "loss": 0.1761, "step": 3775 }, { "epoch": 1.3153340256143995, "grad_norm": 3.8533244132995605, "learning_rate": 6.526315789473685e-06, "loss": 0.1612, "step": 3800 }, { "epoch": 1.32398753894081, "grad_norm": 3.4180614948272705, "learning_rate": 6.5000000000000004e-06, "loss": 0.1668, "step": 3825 }, { "epoch": 1.3326410522672205, "grad_norm": 3.1745965480804443, "learning_rate": 6.473684210526316e-06, "loss": 0.1571, "step": 3850 }, { "epoch": 1.341294565593631, "grad_norm": 3.310295343399048, "learning_rate": 6.447368421052632e-06, "loss": 0.1625, "step": 3875 }, { "epoch": 1.3499480789200415, "grad_norm": 3.5954184532165527, "learning_rate": 6.421052631578948e-06, "loss": 0.158, "step": 3900 }, { "epoch": 1.358601592246452, "grad_norm": 2.868551731109619, "learning_rate": 6.394736842105264e-06, "loss": 0.17, "step": 3925 }, { "epoch": 1.3672551055728626, "grad_norm": 2.9729490280151367, "learning_rate": 6.3684210526315795e-06, "loss": 0.1511, "step": 3950 }, { "epoch": 1.375908618899273, "grad_norm": 2.286844253540039, "learning_rate": 6.342105263157895e-06, "loss": 0.1648, "step": 3975 }, { "epoch": 1.3845621322256836, "grad_norm": 3.9818239212036133, "learning_rate": 6.31578947368421e-06, "loss": 0.1664, "step": 4000 }, { "epoch": 1.3845621322256836, "eval_loss": 0.21563765406608582, "eval_runtime": 8678.9976, "eval_samples_per_second": 1.184, "eval_steps_per_second": 0.074, "eval_wer": 18.037431765011696, "step": 4000 }, { "epoch": 1.393215645552094, "grad_norm": 3.8452024459838867, "learning_rate": 6.289473684210526e-06, "loss": 0.1642, "step": 4025 }, { "epoch": 1.4018691588785046, "grad_norm": 3.381753444671631, "learning_rate": 6.263157894736842e-06, "loss": 0.1671, "step": 4050 }, { "epoch": 1.4105226722049151, "grad_norm": 3.9922471046447754, "learning_rate": 6.236842105263159e-06, "loss": 0.1721, "step": 4075 }, { "epoch": 1.4191761855313256, "grad_norm": 3.2609457969665527, "learning_rate": 6.2105263157894745e-06, "loss": 0.1832, "step": 4100 }, { "epoch": 1.4278296988577361, "grad_norm": 3.5233139991760254, "learning_rate": 6.18421052631579e-06, "loss": 0.1734, "step": 4125 }, { "epoch": 1.4364832121841467, "grad_norm": 4.901401519775391, "learning_rate": 6.157894736842106e-06, "loss": 0.181, "step": 4150 }, { "epoch": 1.4451367255105572, "grad_norm": 2.4299676418304443, "learning_rate": 6.131578947368422e-06, "loss": 0.1538, "step": 4175 }, { "epoch": 1.4537902388369677, "grad_norm": 4.308781623840332, "learning_rate": 6.105263157894738e-06, "loss": 0.1501, "step": 4200 }, { "epoch": 1.4624437521633784, "grad_norm": 4.0135498046875, "learning_rate": 6.0789473684210535e-06, "loss": 0.1735, "step": 4225 }, { "epoch": 1.471097265489789, "grad_norm": 3.9877755641937256, "learning_rate": 6.0526315789473685e-06, "loss": 0.1717, "step": 4250 }, { "epoch": 1.4797507788161994, "grad_norm": 3.184150218963623, "learning_rate": 6.026315789473684e-06, "loss": 0.1571, "step": 4275 }, { "epoch": 1.48840429214261, "grad_norm": 3.2754974365234375, "learning_rate": 6e-06, "loss": 0.1618, "step": 4300 }, { "epoch": 1.4970578054690205, "grad_norm": 3.145984411239624, "learning_rate": 5.973684210526316e-06, "loss": 0.1637, "step": 4325 }, { "epoch": 1.505711318795431, "grad_norm": 4.307953834533691, "learning_rate": 5.947368421052632e-06, "loss": 0.1568, "step": 4350 }, { "epoch": 1.5143648321218415, "grad_norm": 2.7052788734436035, "learning_rate": 5.921052631578948e-06, "loss": 0.1573, "step": 4375 }, { "epoch": 1.523018345448252, "grad_norm": 4.613982677459717, "learning_rate": 5.8947368421052634e-06, "loss": 0.1531, "step": 4400 }, { "epoch": 1.5316718587746625, "grad_norm": 3.401477813720703, "learning_rate": 5.86842105263158e-06, "loss": 0.167, "step": 4425 }, { "epoch": 1.540325372101073, "grad_norm": 4.301424503326416, "learning_rate": 5.842105263157896e-06, "loss": 0.168, "step": 4450 }, { "epoch": 1.5489788854274835, "grad_norm": 4.266972541809082, "learning_rate": 5.815789473684212e-06, "loss": 0.1589, "step": 4475 }, { "epoch": 1.557632398753894, "grad_norm": 3.3040754795074463, "learning_rate": 5.789473684210527e-06, "loss": 0.1659, "step": 4500 }, { "epoch": 1.557632398753894, "eval_loss": 0.21191351115703583, "eval_runtime": 8655.7858, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.074, "eval_wer": 18.74360973919071, "step": 4500 }, { "epoch": 1.5662859120803048, "grad_norm": 4.775163173675537, "learning_rate": 5.7631578947368425e-06, "loss": 0.1749, "step": 4525 }, { "epoch": 1.5749394254067153, "grad_norm": 3.8686747550964355, "learning_rate": 5.736842105263158e-06, "loss": 0.1656, "step": 4550 }, { "epoch": 1.5835929387331258, "grad_norm": 3.2979884147644043, "learning_rate": 5.710526315789474e-06, "loss": 0.1652, "step": 4575 }, { "epoch": 1.5922464520595363, "grad_norm": 3.271785259246826, "learning_rate": 5.68421052631579e-06, "loss": 0.1611, "step": 4600 }, { "epoch": 1.6008999653859468, "grad_norm": 4.323774814605713, "learning_rate": 5.657894736842106e-06, "loss": 0.1713, "step": 4625 }, { "epoch": 1.6095534787123573, "grad_norm": 2.925485134124756, "learning_rate": 5.631578947368422e-06, "loss": 0.1634, "step": 4650 }, { "epoch": 1.6182069920387678, "grad_norm": 3.3976783752441406, "learning_rate": 5.605263157894737e-06, "loss": 0.1761, "step": 4675 }, { "epoch": 1.6268605053651783, "grad_norm": 3.1800551414489746, "learning_rate": 5.578947368421052e-06, "loss": 0.1522, "step": 4700 }, { "epoch": 1.6355140186915889, "grad_norm": 3.392937660217285, "learning_rate": 5.552631578947368e-06, "loss": 0.1604, "step": 4725 }, { "epoch": 1.6441675320179994, "grad_norm": 3.9035747051239014, "learning_rate": 5.526315789473685e-06, "loss": 0.1922, "step": 4750 }, { "epoch": 1.6528210453444099, "grad_norm": 3.81205415725708, "learning_rate": 5.500000000000001e-06, "loss": 0.1786, "step": 4775 }, { "epoch": 1.6614745586708204, "grad_norm": 3.3572874069213867, "learning_rate": 5.4736842105263165e-06, "loss": 0.169, "step": 4800 }, { "epoch": 1.670128071997231, "grad_norm": 3.0381922721862793, "learning_rate": 5.447368421052632e-06, "loss": 0.1753, "step": 4825 }, { "epoch": 1.6787815853236414, "grad_norm": 3.7208361625671387, "learning_rate": 5.421052631578948e-06, "loss": 0.1587, "step": 4850 }, { "epoch": 1.687435098650052, "grad_norm": 6.452873229980469, "learning_rate": 5.394736842105264e-06, "loss": 0.1559, "step": 4875 }, { "epoch": 1.6960886119764624, "grad_norm": 3.532186269760132, "learning_rate": 5.36842105263158e-06, "loss": 0.1587, "step": 4900 }, { "epoch": 1.704742125302873, "grad_norm": 3.6204092502593994, "learning_rate": 5.342105263157895e-06, "loss": 0.1638, "step": 4925 }, { "epoch": 1.7133956386292835, "grad_norm": 3.3600478172302246, "learning_rate": 5.315789473684211e-06, "loss": 0.1657, "step": 4950 }, { "epoch": 1.722049151955694, "grad_norm": 3.8117873668670654, "learning_rate": 5.289473684210526e-06, "loss": 0.1533, "step": 4975 }, { "epoch": 1.7307026652821045, "grad_norm": 4.345729827880859, "learning_rate": 5.263157894736842e-06, "loss": 0.1611, "step": 5000 }, { "epoch": 1.7307026652821045, "eval_loss": 0.20883877575397491, "eval_runtime": 8644.4172, "eval_samples_per_second": 1.188, "eval_steps_per_second": 0.074, "eval_wer": 17.726366865956155, "step": 5000 }, { "epoch": 1.739356178608515, "grad_norm": 2.709228515625, "learning_rate": 5.236842105263158e-06, "loss": 0.1723, "step": 5025 }, { "epoch": 1.7480096919349255, "grad_norm": 4.446653366088867, "learning_rate": 5.210526315789474e-06, "loss": 0.1606, "step": 5050 }, { "epoch": 1.756663205261336, "grad_norm": 4.571587562561035, "learning_rate": 5.18421052631579e-06, "loss": 0.1628, "step": 5075 }, { "epoch": 1.7653167185877465, "grad_norm": 3.951996088027954, "learning_rate": 5.157894736842106e-06, "loss": 0.1532, "step": 5100 }, { "epoch": 1.773970231914157, "grad_norm": 3.2565793991088867, "learning_rate": 5.131578947368422e-06, "loss": 0.1599, "step": 5125 }, { "epoch": 1.7826237452405675, "grad_norm": 2.625930070877075, "learning_rate": 5.105263157894738e-06, "loss": 0.1606, "step": 5150 }, { "epoch": 1.791277258566978, "grad_norm": 3.5779178142547607, "learning_rate": 5.078947368421053e-06, "loss": 0.1683, "step": 5175 }, { "epoch": 1.7999307718933886, "grad_norm": 3.518836736679077, "learning_rate": 5.052631578947369e-06, "loss": 0.1575, "step": 5200 }, { "epoch": 1.808584285219799, "grad_norm": 2.62227725982666, "learning_rate": 5.026315789473685e-06, "loss": 0.1549, "step": 5225 }, { "epoch": 1.8172377985462098, "grad_norm": 3.5382871627807617, "learning_rate": 5e-06, "loss": 0.1566, "step": 5250 }, { "epoch": 1.8258913118726203, "grad_norm": 4.410214900970459, "learning_rate": 4.973684210526316e-06, "loss": 0.1529, "step": 5275 }, { "epoch": 1.8345448251990308, "grad_norm": 3.1463205814361572, "learning_rate": 4.947368421052632e-06, "loss": 0.1551, "step": 5300 }, { "epoch": 1.8431983385254413, "grad_norm": 2.4352145195007324, "learning_rate": 4.921052631578948e-06, "loss": 0.1624, "step": 5325 }, { "epoch": 1.8518518518518519, "grad_norm": 3.8748574256896973, "learning_rate": 4.894736842105264e-06, "loss": 0.1619, "step": 5350 }, { "epoch": 1.8605053651782624, "grad_norm": 2.8592870235443115, "learning_rate": 4.8684210526315795e-06, "loss": 0.1709, "step": 5375 }, { "epoch": 1.8691588785046729, "grad_norm": 3.5654568672180176, "learning_rate": 4.842105263157895e-06, "loss": 0.1568, "step": 5400 }, { "epoch": 1.8778123918310834, "grad_norm": 3.1443722248077393, "learning_rate": 4.815789473684211e-06, "loss": 0.1546, "step": 5425 }, { "epoch": 1.886465905157494, "grad_norm": 2.727612018585205, "learning_rate": 4.789473684210527e-06, "loss": 0.1502, "step": 5450 }, { "epoch": 1.8951194184839044, "grad_norm": 3.5027356147766113, "learning_rate": 4.763157894736842e-06, "loss": 0.1545, "step": 5475 }, { "epoch": 1.9037729318103151, "grad_norm": 3.154855966567993, "learning_rate": 4.736842105263158e-06, "loss": 0.1424, "step": 5500 }, { "epoch": 1.9037729318103151, "eval_loss": 0.20275835692882538, "eval_runtime": 8641.5231, "eval_samples_per_second": 1.189, "eval_steps_per_second": 0.074, "eval_wer": 17.243739710597, "step": 5500 }, { "epoch": 1.9124264451367257, "grad_norm": 2.7067971229553223, "learning_rate": 4.710526315789474e-06, "loss": 0.1599, "step": 5525 }, { "epoch": 1.9210799584631362, "grad_norm": 3.4274163246154785, "learning_rate": 4.68421052631579e-06, "loss": 0.1596, "step": 5550 }, { "epoch": 1.9297334717895467, "grad_norm": 3.3891353607177734, "learning_rate": 4.657894736842106e-06, "loss": 0.1836, "step": 5575 }, { "epoch": 1.9383869851159572, "grad_norm": 3.259261131286621, "learning_rate": 4.631578947368421e-06, "loss": 0.1574, "step": 5600 }, { "epoch": 1.9470404984423677, "grad_norm": 4.355072021484375, "learning_rate": 4.605263157894737e-06, "loss": 0.151, "step": 5625 }, { "epoch": 1.9556940117687782, "grad_norm": 4.160757064819336, "learning_rate": 4.578947368421053e-06, "loss": 0.153, "step": 5650 }, { "epoch": 1.9643475250951887, "grad_norm": 2.7162065505981445, "learning_rate": 4.552631578947369e-06, "loss": 0.1504, "step": 5675 }, { "epoch": 1.9730010384215992, "grad_norm": 3.1264755725860596, "learning_rate": 4.526315789473685e-06, "loss": 0.1503, "step": 5700 }, { "epoch": 1.9816545517480098, "grad_norm": 3.2158703804016113, "learning_rate": 4.5e-06, "loss": 0.1629, "step": 5725 }, { "epoch": 1.9903080650744203, "grad_norm": 3.41349196434021, "learning_rate": 4.473684210526316e-06, "loss": 0.1628, "step": 5750 }, { "epoch": 1.9989615784008308, "grad_norm": 2.600003957748413, "learning_rate": 4.447368421052632e-06, "loss": 0.153, "step": 5775 }, { "epoch": 2.0076150917272413, "grad_norm": 2.955773115158081, "learning_rate": 4.4210526315789476e-06, "loss": 0.1056, "step": 5800 }, { "epoch": 2.016268605053652, "grad_norm": 3.6034035682678223, "learning_rate": 4.394736842105263e-06, "loss": 0.1, "step": 5825 }, { "epoch": 2.0249221183800623, "grad_norm": 2.37636137008667, "learning_rate": 4.368421052631579e-06, "loss": 0.1042, "step": 5850 }, { "epoch": 2.033575631706473, "grad_norm": 2.6915884017944336, "learning_rate": 4.342105263157895e-06, "loss": 0.1162, "step": 5875 }, { "epoch": 2.0422291450328833, "grad_norm": 2.495497226715088, "learning_rate": 4.315789473684211e-06, "loss": 0.1097, "step": 5900 }, { "epoch": 2.050882658359294, "grad_norm": 3.1484713554382324, "learning_rate": 4.289473684210527e-06, "loss": 0.1183, "step": 5925 }, { "epoch": 2.0595361716857044, "grad_norm": 2.547849416732788, "learning_rate": 4.2631578947368425e-06, "loss": 0.1205, "step": 5950 }, { "epoch": 2.068189685012115, "grad_norm": 2.342745304107666, "learning_rate": 4.236842105263158e-06, "loss": 0.1117, "step": 5975 }, { "epoch": 2.0768431983385254, "grad_norm": 2.926923990249634, "learning_rate": 4.210526315789474e-06, "loss": 0.1101, "step": 6000 }, { "epoch": 2.0768431983385254, "eval_loss": 0.20616546273231506, "eval_runtime": 8634.4104, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.074, "eval_wer": 16.82523178234122, "step": 6000 }, { "epoch": 2.085496711664936, "grad_norm": 2.461634635925293, "learning_rate": 4.18421052631579e-06, "loss": 0.1108, "step": 6025 }, { "epoch": 2.0941502249913464, "grad_norm": 1.6099869012832642, "learning_rate": 4.157894736842106e-06, "loss": 0.1091, "step": 6050 }, { "epoch": 2.102803738317757, "grad_norm": 2.497805595397949, "learning_rate": 4.1315789473684216e-06, "loss": 0.1054, "step": 6075 }, { "epoch": 2.1114572516441674, "grad_norm": 2.440737009048462, "learning_rate": 4.105263157894737e-06, "loss": 0.1143, "step": 6100 }, { "epoch": 2.120110764970578, "grad_norm": 2.547050714492798, "learning_rate": 4.078947368421053e-06, "loss": 0.1051, "step": 6125 }, { "epoch": 2.1287642782969884, "grad_norm": 2.2565364837646484, "learning_rate": 4.052631578947368e-06, "loss": 0.1079, "step": 6150 }, { "epoch": 2.137417791623399, "grad_norm": 3.4482452869415283, "learning_rate": 4.026315789473684e-06, "loss": 0.107, "step": 6175 }, { "epoch": 2.1460713049498095, "grad_norm": 1.6255193948745728, "learning_rate": 4.000000000000001e-06, "loss": 0.1124, "step": 6200 }, { "epoch": 2.15472481827622, "grad_norm": 2.6273090839385986, "learning_rate": 3.9736842105263165e-06, "loss": 0.1012, "step": 6225 }, { "epoch": 2.1633783316026305, "grad_norm": 4.822213649749756, "learning_rate": 3.947368421052632e-06, "loss": 0.106, "step": 6250 }, { "epoch": 2.172031844929041, "grad_norm": 3.0468506813049316, "learning_rate": 3.921052631578947e-06, "loss": 0.1343, "step": 6275 }, { "epoch": 2.1806853582554515, "grad_norm": 3.5357604026794434, "learning_rate": 3.894736842105263e-06, "loss": 0.1066, "step": 6300 }, { "epoch": 2.189338871581862, "grad_norm": 2.8175506591796875, "learning_rate": 3.868421052631579e-06, "loss": 0.1102, "step": 6325 }, { "epoch": 2.1979923849082725, "grad_norm": 3.171792984008789, "learning_rate": 3.842105263157895e-06, "loss": 0.1081, "step": 6350 }, { "epoch": 2.2066458982346835, "grad_norm": 2.2714669704437256, "learning_rate": 3.815789473684211e-06, "loss": 0.1077, "step": 6375 }, { "epoch": 2.215299411561094, "grad_norm": 4.731479644775391, "learning_rate": 3.789473684210527e-06, "loss": 0.1055, "step": 6400 }, { "epoch": 2.2239529248875045, "grad_norm": 2.8998143672943115, "learning_rate": 3.7631578947368426e-06, "loss": 0.1189, "step": 6425 }, { "epoch": 2.232606438213915, "grad_norm": 2.2706921100616455, "learning_rate": 3.736842105263158e-06, "loss": 0.1134, "step": 6450 }, { "epoch": 2.2412599515403255, "grad_norm": 3.229358196258545, "learning_rate": 3.710526315789474e-06, "loss": 0.1154, "step": 6475 }, { "epoch": 2.249913464866736, "grad_norm": 2.179197072982788, "learning_rate": 3.6842105263157896e-06, "loss": 0.0966, "step": 6500 }, { "epoch": 2.249913464866736, "eval_loss": 0.20438149571418762, "eval_runtime": 8615.3588, "eval_samples_per_second": 1.192, "eval_steps_per_second": 0.075, "eval_wer": 16.619876960402046, "step": 6500 }, { "epoch": 2.2585669781931466, "grad_norm": 2.860914707183838, "learning_rate": 3.657894736842106e-06, "loss": 0.1083, "step": 6525 }, { "epoch": 2.267220491519557, "grad_norm": 3.0490429401397705, "learning_rate": 3.6315789473684217e-06, "loss": 0.1068, "step": 6550 }, { "epoch": 2.2758740048459676, "grad_norm": 3.8441545963287354, "learning_rate": 3.605263157894737e-06, "loss": 0.1125, "step": 6575 }, { "epoch": 2.284527518172378, "grad_norm": 2.9149553775787354, "learning_rate": 3.578947368421053e-06, "loss": 0.1081, "step": 6600 }, { "epoch": 2.2931810314987886, "grad_norm": 3.6281797885894775, "learning_rate": 3.5526315789473687e-06, "loss": 0.1116, "step": 6625 }, { "epoch": 2.301834544825199, "grad_norm": 2.624938488006592, "learning_rate": 3.5263157894736846e-06, "loss": 0.1062, "step": 6650 }, { "epoch": 2.3104880581516096, "grad_norm": 3.465491533279419, "learning_rate": 3.5e-06, "loss": 0.128, "step": 6675 }, { "epoch": 2.31914157147802, "grad_norm": 3.024850606918335, "learning_rate": 3.473684210526316e-06, "loss": 0.1062, "step": 6700 }, { "epoch": 2.3277950848044306, "grad_norm": 3.088701009750366, "learning_rate": 3.447368421052632e-06, "loss": 0.1155, "step": 6725 }, { "epoch": 2.336448598130841, "grad_norm": 2.8708367347717285, "learning_rate": 3.421052631578948e-06, "loss": 0.1079, "step": 6750 }, { "epoch": 2.3451021114572517, "grad_norm": 1.946626901626587, "learning_rate": 3.3947368421052636e-06, "loss": 0.1072, "step": 6775 }, { "epoch": 2.353755624783662, "grad_norm": 3.3041462898254395, "learning_rate": 3.368421052631579e-06, "loss": 0.1104, "step": 6800 }, { "epoch": 2.3624091381100727, "grad_norm": 2.6233861446380615, "learning_rate": 3.342105263157895e-06, "loss": 0.1075, "step": 6825 }, { "epoch": 2.371062651436483, "grad_norm": 2.8356857299804688, "learning_rate": 3.3157894736842107e-06, "loss": 0.1058, "step": 6850 }, { "epoch": 2.3797161647628937, "grad_norm": 2.9162681102752686, "learning_rate": 3.289473684210527e-06, "loss": 0.122, "step": 6875 }, { "epoch": 2.3883696780893042, "grad_norm": 3.749187707901001, "learning_rate": 3.2631578947368423e-06, "loss": 0.1136, "step": 6900 }, { "epoch": 2.3970231914157147, "grad_norm": 2.6137099266052246, "learning_rate": 3.236842105263158e-06, "loss": 0.1087, "step": 6925 }, { "epoch": 2.4056767047421252, "grad_norm": 2.7214744091033936, "learning_rate": 3.210526315789474e-06, "loss": 0.103, "step": 6950 }, { "epoch": 2.4143302180685358, "grad_norm": 2.98718523979187, "learning_rate": 3.1842105263157898e-06, "loss": 0.1125, "step": 6975 }, { "epoch": 2.4229837313949463, "grad_norm": 2.834648609161377, "learning_rate": 3.157894736842105e-06, "loss": 0.1129, "step": 7000 }, { "epoch": 2.4229837313949463, "eval_loss": 0.20139536261558533, "eval_runtime": 8648.9738, "eval_samples_per_second": 1.188, "eval_steps_per_second": 0.074, "eval_wer": 17.110302400138636, "step": 7000 }, { "epoch": 2.431637244721357, "grad_norm": 3.4440181255340576, "learning_rate": 3.131578947368421e-06, "loss": 0.1141, "step": 7025 }, { "epoch": 2.4402907580477673, "grad_norm": 3.478074550628662, "learning_rate": 3.1052631578947372e-06, "loss": 0.1031, "step": 7050 }, { "epoch": 2.448944271374178, "grad_norm": 2.797724485397339, "learning_rate": 3.078947368421053e-06, "loss": 0.1094, "step": 7075 }, { "epoch": 2.4575977847005883, "grad_norm": 1.8929002285003662, "learning_rate": 3.052631578947369e-06, "loss": 0.1097, "step": 7100 }, { "epoch": 2.466251298026999, "grad_norm": 3.516230583190918, "learning_rate": 3.0263157894736843e-06, "loss": 0.1167, "step": 7125 }, { "epoch": 2.4749048113534093, "grad_norm": 3.7443478107452393, "learning_rate": 3e-06, "loss": 0.1037, "step": 7150 }, { "epoch": 2.48355832467982, "grad_norm": 2.543609142303467, "learning_rate": 2.973684210526316e-06, "loss": 0.1074, "step": 7175 }, { "epoch": 2.4922118380062304, "grad_norm": 3.233546495437622, "learning_rate": 2.9473684210526317e-06, "loss": 0.1097, "step": 7200 }, { "epoch": 2.5008653513326413, "grad_norm": 2.6485321521759033, "learning_rate": 2.921052631578948e-06, "loss": 0.1166, "step": 7225 }, { "epoch": 2.509518864659052, "grad_norm": 2.249458074569702, "learning_rate": 2.8947368421052634e-06, "loss": 0.1122, "step": 7250 }, { "epoch": 2.5181723779854623, "grad_norm": 3.3715906143188477, "learning_rate": 2.868421052631579e-06, "loss": 0.1171, "step": 7275 }, { "epoch": 2.526825891311873, "grad_norm": 2.5565547943115234, "learning_rate": 2.842105263157895e-06, "loss": 0.1081, "step": 7300 }, { "epoch": 2.5354794046382834, "grad_norm": 3.1583316326141357, "learning_rate": 2.815789473684211e-06, "loss": 0.1048, "step": 7325 }, { "epoch": 2.544132917964694, "grad_norm": 3.302534580230713, "learning_rate": 2.789473684210526e-06, "loss": 0.1184, "step": 7350 }, { "epoch": 2.5527864312911044, "grad_norm": 3.553318738937378, "learning_rate": 2.7631578947368424e-06, "loss": 0.1151, "step": 7375 }, { "epoch": 2.561439944617515, "grad_norm": 2.6962010860443115, "learning_rate": 2.7368421052631583e-06, "loss": 0.1201, "step": 7400 }, { "epoch": 2.5700934579439254, "grad_norm": 2.545358657836914, "learning_rate": 2.710526315789474e-06, "loss": 0.1273, "step": 7425 }, { "epoch": 2.578746971270336, "grad_norm": 2.1197948455810547, "learning_rate": 2.68421052631579e-06, "loss": 0.0947, "step": 7450 }, { "epoch": 2.5874004845967464, "grad_norm": 1.732006311416626, "learning_rate": 2.6578947368421053e-06, "loss": 0.1079, "step": 7475 }, { "epoch": 2.596053997923157, "grad_norm": 2.9386560916900635, "learning_rate": 2.631578947368421e-06, "loss": 0.1065, "step": 7500 }, { "epoch": 2.596053997923157, "eval_loss": 0.1983751654624939, "eval_runtime": 8607.5634, "eval_samples_per_second": 1.193, "eval_steps_per_second": 0.075, "eval_wer": 16.488172602027554, "step": 7500 }, { "epoch": 2.6047075112495675, "grad_norm": 3.1988844871520996, "learning_rate": 2.605263157894737e-06, "loss": 0.1065, "step": 7525 }, { "epoch": 2.613361024575978, "grad_norm": 2.8446412086486816, "learning_rate": 2.578947368421053e-06, "loss": 0.1175, "step": 7550 }, { "epoch": 2.6220145379023885, "grad_norm": 3.071406364440918, "learning_rate": 2.552631578947369e-06, "loss": 0.0999, "step": 7575 }, { "epoch": 2.630668051228799, "grad_norm": 2.666354179382324, "learning_rate": 2.5263157894736844e-06, "loss": 0.1036, "step": 7600 }, { "epoch": 2.6393215645552095, "grad_norm": 2.845916271209717, "learning_rate": 2.5e-06, "loss": 0.1033, "step": 7625 }, { "epoch": 2.64797507788162, "grad_norm": 2.97814679145813, "learning_rate": 2.473684210526316e-06, "loss": 0.1025, "step": 7650 }, { "epoch": 2.6566285912080305, "grad_norm": 2.5824403762817383, "learning_rate": 2.447368421052632e-06, "loss": 0.1048, "step": 7675 }, { "epoch": 2.665282104534441, "grad_norm": 2.1139883995056152, "learning_rate": 2.4210526315789477e-06, "loss": 0.1047, "step": 7700 }, { "epoch": 2.6739356178608515, "grad_norm": 2.800978183746338, "learning_rate": 2.3947368421052635e-06, "loss": 0.1184, "step": 7725 }, { "epoch": 2.682589131187262, "grad_norm": 3.0786638259887695, "learning_rate": 2.368421052631579e-06, "loss": 0.1286, "step": 7750 }, { "epoch": 2.6912426445136726, "grad_norm": 2.917689085006714, "learning_rate": 2.342105263157895e-06, "loss": 0.0988, "step": 7775 }, { "epoch": 2.699896157840083, "grad_norm": 2.986503839492798, "learning_rate": 2.3157894736842105e-06, "loss": 0.1074, "step": 7800 }, { "epoch": 2.7085496711664936, "grad_norm": 2.5001847743988037, "learning_rate": 2.2894736842105263e-06, "loss": 0.1058, "step": 7825 }, { "epoch": 2.717203184492904, "grad_norm": 3.5014684200286865, "learning_rate": 2.2631578947368426e-06, "loss": 0.1094, "step": 7850 }, { "epoch": 2.7258566978193146, "grad_norm": 3.0983262062072754, "learning_rate": 2.236842105263158e-06, "loss": 0.1079, "step": 7875 }, { "epoch": 2.734510211145725, "grad_norm": 2.922757625579834, "learning_rate": 2.2105263157894738e-06, "loss": 0.1135, "step": 7900 }, { "epoch": 2.7431637244721356, "grad_norm": 2.354649305343628, "learning_rate": 2.1842105263157896e-06, "loss": 0.1145, "step": 7925 }, { "epoch": 2.751817237798546, "grad_norm": 3.7237930297851562, "learning_rate": 2.1578947368421054e-06, "loss": 0.098, "step": 7950 }, { "epoch": 2.7604707511249567, "grad_norm": 3.361809492111206, "learning_rate": 2.1315789473684212e-06, "loss": 0.1108, "step": 7975 }, { "epoch": 2.769124264451367, "grad_norm": 2.6860949993133545, "learning_rate": 2.105263157894737e-06, "loss": 0.1075, "step": 8000 }, { "epoch": 2.769124264451367, "eval_loss": 0.19574593007564545, "eval_runtime": 8625.7946, "eval_samples_per_second": 1.191, "eval_steps_per_second": 0.074, "eval_wer": 16.583484966640672, "step": 8000 }, { "epoch": 2.7777777777777777, "grad_norm": 2.503368616104126, "learning_rate": 2.078947368421053e-06, "loss": 0.0999, "step": 8025 }, { "epoch": 2.786431291104188, "grad_norm": 2.762155055999756, "learning_rate": 2.0526315789473687e-06, "loss": 0.1133, "step": 8050 }, { "epoch": 2.7950848044305987, "grad_norm": 3.162900686264038, "learning_rate": 2.026315789473684e-06, "loss": 0.1208, "step": 8075 }, { "epoch": 2.803738317757009, "grad_norm": 2.3575284481048584, "learning_rate": 2.0000000000000003e-06, "loss": 0.1011, "step": 8100 }, { "epoch": 2.8123918310834197, "grad_norm": 3.4756760597229004, "learning_rate": 1.973684210526316e-06, "loss": 0.1095, "step": 8125 }, { "epoch": 2.8210453444098302, "grad_norm": 2.538372039794922, "learning_rate": 1.9473684210526315e-06, "loss": 0.1069, "step": 8150 }, { "epoch": 2.8296988577362407, "grad_norm": 2.2625138759613037, "learning_rate": 1.9210526315789474e-06, "loss": 0.1065, "step": 8175 }, { "epoch": 2.8383523710626513, "grad_norm": 2.7284586429595947, "learning_rate": 1.8947368421052634e-06, "loss": 0.1105, "step": 8200 }, { "epoch": 2.8470058843890618, "grad_norm": 2.6115376949310303, "learning_rate": 1.868421052631579e-06, "loss": 0.1035, "step": 8225 }, { "epoch": 2.8556593977154723, "grad_norm": 2.6199817657470703, "learning_rate": 1.8421052631578948e-06, "loss": 0.1224, "step": 8250 }, { "epoch": 2.864312911041883, "grad_norm": 3.060654640197754, "learning_rate": 1.8157894736842109e-06, "loss": 0.1027, "step": 8275 }, { "epoch": 2.8729664243682933, "grad_norm": 3.6875500679016113, "learning_rate": 1.7894736842105265e-06, "loss": 0.0934, "step": 8300 }, { "epoch": 2.881619937694704, "grad_norm": 2.232487440109253, "learning_rate": 1.7631578947368423e-06, "loss": 0.0972, "step": 8325 }, { "epoch": 2.8902734510211143, "grad_norm": 3.0473804473876953, "learning_rate": 1.736842105263158e-06, "loss": 0.1013, "step": 8350 }, { "epoch": 2.898926964347525, "grad_norm": 3.049717903137207, "learning_rate": 1.710526315789474e-06, "loss": 0.1024, "step": 8375 }, { "epoch": 2.9075804776739353, "grad_norm": 3.389495849609375, "learning_rate": 1.6842105263157895e-06, "loss": 0.1114, "step": 8400 }, { "epoch": 2.9162339910003463, "grad_norm": 2.7483088970184326, "learning_rate": 1.6578947368421053e-06, "loss": 0.1103, "step": 8425 }, { "epoch": 2.924887504326757, "grad_norm": 2.98256778717041, "learning_rate": 1.6315789473684212e-06, "loss": 0.1131, "step": 8450 }, { "epoch": 2.9335410176531673, "grad_norm": 3.0447702407836914, "learning_rate": 1.605263157894737e-06, "loss": 0.1031, "step": 8475 }, { "epoch": 2.942194530979578, "grad_norm": 2.4080259799957275, "learning_rate": 1.5789473684210526e-06, "loss": 0.0992, "step": 8500 }, { "epoch": 2.942194530979578, "eval_loss": 0.19420863687992096, "eval_runtime": 8629.0493, "eval_samples_per_second": 1.19, "eval_steps_per_second": 0.074, "eval_wer": 16.375530716575685, "step": 8500 }, { "epoch": 2.9508480443059883, "grad_norm": 2.4957826137542725, "learning_rate": 1.5526315789473686e-06, "loss": 0.0983, "step": 8525 }, { "epoch": 2.959501557632399, "grad_norm": 2.019061326980591, "learning_rate": 1.5263157894736844e-06, "loss": 0.099, "step": 8550 }, { "epoch": 2.9681550709588094, "grad_norm": 3.2875280380249023, "learning_rate": 1.5e-06, "loss": 0.1051, "step": 8575 }, { "epoch": 2.97680858428522, "grad_norm": 2.705897092819214, "learning_rate": 1.4736842105263159e-06, "loss": 0.1033, "step": 8600 }, { "epoch": 2.9854620976116304, "grad_norm": 2.27734375, "learning_rate": 1.4473684210526317e-06, "loss": 0.1075, "step": 8625 }, { "epoch": 2.994115610938041, "grad_norm": 3.100257635116577, "learning_rate": 1.4210526315789475e-06, "loss": 0.11, "step": 8650 }, { "epoch": 3.0027691242644514, "grad_norm": 2.2938201427459717, "learning_rate": 1.394736842105263e-06, "loss": 0.0875, "step": 8675 }, { "epoch": 3.011422637590862, "grad_norm": 1.6862682104110718, "learning_rate": 1.3684210526315791e-06, "loss": 0.0785, "step": 8700 }, { "epoch": 3.0200761509172724, "grad_norm": 2.7323806285858154, "learning_rate": 1.342105263157895e-06, "loss": 0.0795, "step": 8725 }, { "epoch": 3.028729664243683, "grad_norm": 2.4621291160583496, "learning_rate": 1.3157894736842106e-06, "loss": 0.0693, "step": 8750 }, { "epoch": 3.0373831775700935, "grad_norm": 2.2543725967407227, "learning_rate": 1.2894736842105266e-06, "loss": 0.0707, "step": 8775 }, { "epoch": 3.046036690896504, "grad_norm": 2.0178897380828857, "learning_rate": 1.2631578947368422e-06, "loss": 0.0787, "step": 8800 }, { "epoch": 3.0546902042229145, "grad_norm": 1.9907864332199097, "learning_rate": 1.236842105263158e-06, "loss": 0.075, "step": 8825 }, { "epoch": 3.063343717549325, "grad_norm": 2.3367834091186523, "learning_rate": 1.2105263157894738e-06, "loss": 0.0789, "step": 8850 }, { "epoch": 3.0719972308757355, "grad_norm": 2.4846036434173584, "learning_rate": 1.1842105263157894e-06, "loss": 0.0722, "step": 8875 }, { "epoch": 3.080650744202146, "grad_norm": 2.7100768089294434, "learning_rate": 1.1578947368421053e-06, "loss": 0.0724, "step": 8900 }, { "epoch": 3.0893042575285565, "grad_norm": 2.0488345623016357, "learning_rate": 1.1315789473684213e-06, "loss": 0.0818, "step": 8925 }, { "epoch": 3.097957770854967, "grad_norm": 2.2149784564971924, "learning_rate": 1.1052631578947369e-06, "loss": 0.0753, "step": 8950 }, { "epoch": 3.1066112841813776, "grad_norm": 1.7441498041152954, "learning_rate": 1.0789473684210527e-06, "loss": 0.0658, "step": 8975 }, { "epoch": 3.115264797507788, "grad_norm": 2.315944194793701, "learning_rate": 1.0526315789473685e-06, "loss": 0.0687, "step": 9000 }, { "epoch": 3.115264797507788, "eval_loss": 0.2007349729537964, "eval_runtime": 8593.9573, "eval_samples_per_second": 1.195, "eval_steps_per_second": 0.075, "eval_wer": 16.449181180140368, "step": 9000 }, { "epoch": 3.1239183108341986, "grad_norm": 2.1374213695526123, "learning_rate": 1.0263157894736843e-06, "loss": 0.0678, "step": 9025 }, { "epoch": 3.132571824160609, "grad_norm": 2.6714038848876953, "learning_rate": 1.0000000000000002e-06, "loss": 0.0726, "step": 9050 }, { "epoch": 3.1412253374870196, "grad_norm": 2.326164484024048, "learning_rate": 9.736842105263158e-07, "loss": 0.0737, "step": 9075 }, { "epoch": 3.14987885081343, "grad_norm": 1.5465072393417358, "learning_rate": 9.473684210526317e-07, "loss": 0.0699, "step": 9100 }, { "epoch": 3.1585323641398406, "grad_norm": 1.9387298822402954, "learning_rate": 9.210526315789474e-07, "loss": 0.0707, "step": 9125 }, { "epoch": 3.167185877466251, "grad_norm": 2.333085775375366, "learning_rate": 8.947368421052632e-07, "loss": 0.0679, "step": 9150 }, { "epoch": 3.1758393907926616, "grad_norm": 1.9540473222732544, "learning_rate": 8.68421052631579e-07, "loss": 0.0683, "step": 9175 }, { "epoch": 3.184492904119072, "grad_norm": 2.5576722621917725, "learning_rate": 8.421052631578948e-07, "loss": 0.0719, "step": 9200 }, { "epoch": 3.1931464174454827, "grad_norm": 2.0068089962005615, "learning_rate": 8.157894736842106e-07, "loss": 0.0853, "step": 9225 }, { "epoch": 3.2017999307718936, "grad_norm": 2.2162768840789795, "learning_rate": 7.894736842105263e-07, "loss": 0.0683, "step": 9250 }, { "epoch": 3.210453444098304, "grad_norm": 1.776559829711914, "learning_rate": 7.631578947368422e-07, "loss": 0.0798, "step": 9275 }, { "epoch": 3.2191069574247146, "grad_norm": 1.4732505083084106, "learning_rate": 7.368421052631579e-07, "loss": 0.0726, "step": 9300 }, { "epoch": 3.227760470751125, "grad_norm": 2.921454906463623, "learning_rate": 7.105263157894737e-07, "loss": 0.0717, "step": 9325 }, { "epoch": 3.2364139840775357, "grad_norm": 2.061314344406128, "learning_rate": 6.842105263157896e-07, "loss": 0.0694, "step": 9350 }, { "epoch": 3.245067497403946, "grad_norm": 2.4505109786987305, "learning_rate": 6.578947368421053e-07, "loss": 0.0718, "step": 9375 }, { "epoch": 3.2537210107303567, "grad_norm": 2.636258840560913, "learning_rate": 6.315789473684211e-07, "loss": 0.0714, "step": 9400 }, { "epoch": 3.262374524056767, "grad_norm": 2.4016501903533936, "learning_rate": 6.052631578947369e-07, "loss": 0.0821, "step": 9425 }, { "epoch": 3.2710280373831777, "grad_norm": 2.0783393383026123, "learning_rate": 5.789473684210526e-07, "loss": 0.0748, "step": 9450 }, { "epoch": 3.2796815507095882, "grad_norm": 3.0884315967559814, "learning_rate": 5.526315789473684e-07, "loss": 0.0833, "step": 9475 }, { "epoch": 3.2883350640359987, "grad_norm": 2.3851513862609863, "learning_rate": 5.263157894736843e-07, "loss": 0.0722, "step": 9500 }, { "epoch": 3.2883350640359987, "eval_loss": 0.20027859508991241, "eval_runtime": 8622.3221, "eval_samples_per_second": 1.191, "eval_steps_per_second": 0.074, "eval_wer": 16.265488259249633, "step": 9500 }, { "epoch": 3.2969885773624092, "grad_norm": 2.619279146194458, "learning_rate": 5.000000000000001e-07, "loss": 0.0707, "step": 9525 }, { "epoch": 3.3056420906888198, "grad_norm": 1.975462794303894, "learning_rate": 4.7368421052631585e-07, "loss": 0.0696, "step": 9550 }, { "epoch": 3.3142956040152303, "grad_norm": 2.281332015991211, "learning_rate": 4.473684210526316e-07, "loss": 0.0698, "step": 9575 }, { "epoch": 3.322949117341641, "grad_norm": 2.048888683319092, "learning_rate": 4.210526315789474e-07, "loss": 0.0712, "step": 9600 }, { "epoch": 3.3316026306680513, "grad_norm": 2.216397762298584, "learning_rate": 3.9473684210526315e-07, "loss": 0.0756, "step": 9625 }, { "epoch": 3.340256143994462, "grad_norm": 3.0520379543304443, "learning_rate": 3.6842105263157896e-07, "loss": 0.0682, "step": 9650 }, { "epoch": 3.3489096573208723, "grad_norm": 3.0853352546691895, "learning_rate": 3.421052631578948e-07, "loss": 0.0803, "step": 9675 }, { "epoch": 3.357563170647283, "grad_norm": 2.6923489570617676, "learning_rate": 3.1578947368421055e-07, "loss": 0.0699, "step": 9700 }, { "epoch": 3.3662166839736933, "grad_norm": 1.5350950956344604, "learning_rate": 2.894736842105263e-07, "loss": 0.0641, "step": 9725 }, { "epoch": 3.374870197300104, "grad_norm": 1.8158336877822876, "learning_rate": 2.6315789473684213e-07, "loss": 0.0742, "step": 9750 }, { "epoch": 3.3835237106265144, "grad_norm": 2.268543243408203, "learning_rate": 2.3684210526315792e-07, "loss": 0.0812, "step": 9775 }, { "epoch": 3.392177223952925, "grad_norm": 2.02999210357666, "learning_rate": 2.105263157894737e-07, "loss": 0.0745, "step": 9800 }, { "epoch": 3.4008307372793354, "grad_norm": 2.2966854572296143, "learning_rate": 1.8421052631578948e-07, "loss": 0.0685, "step": 9825 }, { "epoch": 3.409484250605746, "grad_norm": 2.4790639877319336, "learning_rate": 1.5789473684210527e-07, "loss": 0.0695, "step": 9850 }, { "epoch": 3.4181377639321564, "grad_norm": 2.1657919883728027, "learning_rate": 1.3157894736842107e-07, "loss": 0.0742, "step": 9875 }, { "epoch": 3.426791277258567, "grad_norm": 1.6919013261795044, "learning_rate": 1.0526315789473685e-07, "loss": 0.0641, "step": 9900 }, { "epoch": 3.4354447905849774, "grad_norm": 2.441950798034668, "learning_rate": 7.894736842105264e-08, "loss": 0.0701, "step": 9925 }, { "epoch": 3.444098303911388, "grad_norm": 1.9817427396774292, "learning_rate": 5.263157894736842e-08, "loss": 0.0677, "step": 9950 }, { "epoch": 3.4527518172377984, "grad_norm": 1.978274941444397, "learning_rate": 2.631578947368421e-08, "loss": 0.073, "step": 9975 }, { "epoch": 3.461405330564209, "grad_norm": 2.204577684402466, "learning_rate": 0.0, "loss": 0.0713, "step": 10000 }, { "epoch": 3.461405330564209, "eval_loss": 0.19991621375083923, "eval_runtime": 8599.1571, "eval_samples_per_second": 1.195, "eval_steps_per_second": 0.075, "eval_wer": 16.21523264881726, "step": 10000 } ], "logging_steps": 25, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.455843688448e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }