{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9357336430507162, "eval_steps": 100, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019357336430507164, "eval_loss": 3.547485828399658, "eval_runtime": 144.8623, "eval_samples_per_second": 39.044, "eval_steps_per_second": 4.88, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.03871467286101433, "eval_loss": 3.0259251594543457, "eval_runtime": 142.9174, "eval_samples_per_second": 39.575, "eval_steps_per_second": 4.947, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.05807200929152149, "eval_loss": 3.0886833667755127, "eval_runtime": 141.7177, "eval_samples_per_second": 39.91, "eval_steps_per_second": 4.989, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.07742934572202866, "eval_loss": 2.3821566104888916, "eval_runtime": 143.4279, "eval_samples_per_second": 39.434, "eval_steps_per_second": 4.929, "eval_wer": 0.9971915071175234, "step": 400 }, { "epoch": 0.09678668215253582, "grad_norm": 3.033390760421753, "learning_rate": 0.0002982, "loss": 4.0938, "step": 500 }, { "epoch": 0.09678668215253582, "eval_loss": 1.4546788930892944, "eval_runtime": 142.7727, "eval_samples_per_second": 39.615, "eval_steps_per_second": 4.952, "eval_wer": 0.9020076711977019, "step": 500 }, { "epoch": 0.11614401858304298, "eval_loss": 1.2602813243865967, "eval_runtime": 143.0991, "eval_samples_per_second": 39.525, "eval_steps_per_second": 4.941, "eval_wer": 0.8509733433904126, "step": 600 }, { "epoch": 0.13550135501355012, "eval_loss": 1.0939536094665527, "eval_runtime": 145.4158, "eval_samples_per_second": 38.895, "eval_steps_per_second": 4.862, "eval_wer": 0.7654667715170677, "step": 700 }, { "epoch": 0.1548586914440573, "eval_loss": 1.0704576969146729, "eval_runtime": 148.1277, "eval_samples_per_second": 38.183, "eval_steps_per_second": 4.773, "eval_wer": 0.7601547078364975, "step": 800 }, { "epoch": 0.17421602787456447, "eval_loss": 0.9356458187103271, "eval_runtime": 143.6296, "eval_samples_per_second": 39.379, "eval_steps_per_second": 4.922, "eval_wer": 0.6972926128612925, "step": 900 }, { "epoch": 0.19357336430507163, "grad_norm": 3.2989861965179443, "learning_rate": 0.0002843684210526315, "loss": 1.0597, "step": 1000 }, { "epoch": 0.19357336430507163, "eval_loss": 0.9103516936302185, "eval_runtime": 146.7237, "eval_samples_per_second": 38.549, "eval_steps_per_second": 4.819, "eval_wer": 0.6765579111232367, "step": 1000 }, { "epoch": 0.2129307007355788, "eval_loss": 0.8879104256629944, "eval_runtime": 153.6385, "eval_samples_per_second": 36.814, "eval_steps_per_second": 4.602, "eval_wer": 0.6569947521304424, "step": 1100 }, { "epoch": 0.23228803716608595, "eval_loss": 0.8594633936882019, "eval_runtime": 147.9212, "eval_samples_per_second": 38.237, "eval_steps_per_second": 4.78, "eval_wer": 0.6611834186580219, "step": 1200 }, { "epoch": 0.2516453735965931, "eval_loss": 0.8351845145225525, "eval_runtime": 148.1861, "eval_samples_per_second": 38.168, "eval_steps_per_second": 4.771, "eval_wer": 0.6075331803373402, "step": 1300 }, { "epoch": 0.27100271002710025, "eval_loss": 0.791232168674469, "eval_runtime": 148.9749, "eval_samples_per_second": 37.966, "eval_steps_per_second": 4.746, "eval_wer": 0.6033124167482466, "step": 1400 }, { "epoch": 0.29036004645760743, "grad_norm": 6.474522113800049, "learning_rate": 0.000268578947368421, "loss": 0.8484, "step": 1500 }, { "epoch": 0.29036004645760743, "eval_loss": 0.7862286567687988, "eval_runtime": 146.7521, "eval_samples_per_second": 38.541, "eval_steps_per_second": 4.818, "eval_wer": 0.6067468023302467, "step": 1500 }, { "epoch": 0.3097173828881146, "eval_loss": 0.7790109515190125, "eval_runtime": 147.7062, "eval_samples_per_second": 38.292, "eval_steps_per_second": 4.787, "eval_wer": 0.6009051371346953, "step": 1600 }, { "epoch": 0.32907471931862176, "eval_loss": 0.7678210735321045, "eval_runtime": 148.3951, "eval_samples_per_second": 38.114, "eval_steps_per_second": 4.764, "eval_wer": 0.5629182648328546, "step": 1700 }, { "epoch": 0.34843205574912894, "eval_loss": 0.7514644861221313, "eval_runtime": 149.2674, "eval_samples_per_second": 37.892, "eval_steps_per_second": 4.736, "eval_wer": 0.5798655133122562, "step": 1800 }, { "epoch": 0.3677893921796361, "eval_loss": 0.7423551678657532, "eval_runtime": 149.3427, "eval_samples_per_second": 37.873, "eval_steps_per_second": 4.734, "eval_wer": 0.5859158094076488, "step": 1900 }, { "epoch": 0.38714672861014326, "grad_norm": 2.573913335800171, "learning_rate": 0.0002527894736842105, "loss": 0.764, "step": 2000 }, { "epoch": 0.38714672861014326, "eval_loss": 0.7129915356636047, "eval_runtime": 148.3711, "eval_samples_per_second": 38.121, "eval_steps_per_second": 4.765, "eval_wer": 0.5520855065718734, "step": 2000 }, { "epoch": 0.4065040650406504, "eval_loss": 0.7114368677139282, "eval_runtime": 148.2007, "eval_samples_per_second": 38.164, "eval_steps_per_second": 4.771, "eval_wer": 0.5407712923881819, "step": 2100 }, { "epoch": 0.4258614014711576, "eval_loss": 0.7228682637214661, "eval_runtime": 149.1432, "eval_samples_per_second": 37.923, "eval_steps_per_second": 4.74, "eval_wer": 0.5577024923368266, "step": 2200 }, { "epoch": 0.4452187379016647, "eval_loss": 0.6773180961608887, "eval_runtime": 154.221, "eval_samples_per_second": 36.675, "eval_steps_per_second": 4.584, "eval_wer": 0.5160084094301167, "step": 2300 }, { "epoch": 0.4645760743321719, "eval_loss": 0.6784498691558838, "eval_runtime": 149.2744, "eval_samples_per_second": 37.89, "eval_steps_per_second": 4.736, "eval_wer": 0.5177897963441447, "step": 2400 }, { "epoch": 0.48393341076267904, "grad_norm": 3.0869553089141846, "learning_rate": 0.000237, "loss": 0.6868, "step": 2500 }, { "epoch": 0.48393341076267904, "eval_loss": 0.672030508518219, "eval_runtime": 149.4453, "eval_samples_per_second": 37.847, "eval_steps_per_second": 4.731, "eval_wer": 0.5261831779300605, "step": 2500 }, { "epoch": 0.5032907471931862, "eval_loss": 0.6804332137107849, "eval_runtime": 151.0327, "eval_samples_per_second": 37.449, "eval_steps_per_second": 4.681, "eval_wer": 0.5336617932628268, "step": 2600 }, { "epoch": 0.5226480836236934, "eval_loss": 0.6598911285400391, "eval_runtime": 149.0299, "eval_samples_per_second": 37.952, "eval_steps_per_second": 4.744, "eval_wer": 0.5023832068174159, "step": 2700 }, { "epoch": 0.5420054200542005, "eval_loss": 0.6287100911140442, "eval_runtime": 149.9845, "eval_samples_per_second": 37.711, "eval_steps_per_second": 4.714, "eval_wer": 0.4902023719728459, "step": 2800 }, { "epoch": 0.5613627564847077, "eval_loss": 0.6304338574409485, "eval_runtime": 150.016, "eval_samples_per_second": 37.703, "eval_steps_per_second": 4.713, "eval_wer": 0.49471200911556545, "step": 2900 }, { "epoch": 0.5807200929152149, "grad_norm": 5.678714275360107, "learning_rate": 0.00022121052631578946, "loss": 0.6761, "step": 3000 }, { "epoch": 0.5807200929152149, "eval_loss": 0.6258472204208374, "eval_runtime": 149.8088, "eval_samples_per_second": 37.755, "eval_steps_per_second": 4.719, "eval_wer": 0.48513103625363097, "step": 3000 }, { "epoch": 0.6000774293457221, "eval_loss": 0.6310975551605225, "eval_runtime": 148.9286, "eval_samples_per_second": 37.978, "eval_steps_per_second": 4.747, "eval_wer": 0.4989809182969299, "step": 3100 }, { "epoch": 0.6194347657762292, "eval_loss": 0.6171565651893616, "eval_runtime": 148.6924, "eval_samples_per_second": 38.038, "eval_steps_per_second": 4.755, "eval_wer": 0.4901060807883038, "step": 3200 }, { "epoch": 0.6387921022067363, "eval_loss": 0.6187321543693542, "eval_runtime": 149.7679, "eval_samples_per_second": 37.765, "eval_steps_per_second": 4.721, "eval_wer": 0.46661103176004237, "step": 3300 }, { "epoch": 0.6581494386372435, "eval_loss": 0.6044796109199524, "eval_runtime": 149.5983, "eval_samples_per_second": 37.808, "eval_steps_per_second": 4.726, "eval_wer": 0.4725489881401358, "step": 3400 }, { "epoch": 0.6775067750677507, "grad_norm": 4.122500419616699, "learning_rate": 0.00020542105263157893, "loss": 0.6462, "step": 3500 }, { "epoch": 0.6775067750677507, "eval_loss": 0.5950499773025513, "eval_runtime": 148.2511, "eval_samples_per_second": 38.151, "eval_steps_per_second": 4.769, "eval_wer": 0.4716823674792573, "step": 3500 }, { "epoch": 0.6968641114982579, "eval_loss": 0.5902624726295471, "eval_runtime": 149.3094, "eval_samples_per_second": 37.881, "eval_steps_per_second": 4.735, "eval_wer": 0.4602237165187527, "step": 3600 }, { "epoch": 0.716221447928765, "eval_loss": 0.5864866375923157, "eval_runtime": 149.6434, "eval_samples_per_second": 37.797, "eval_steps_per_second": 4.725, "eval_wer": 0.47267737638619184, "step": 3700 }, { "epoch": 0.7355787843592722, "eval_loss": 0.5820363759994507, "eval_runtime": 148.886, "eval_samples_per_second": 37.989, "eval_steps_per_second": 4.749, "eval_wer": 0.459036125242734, "step": 3800 }, { "epoch": 0.7549361207897793, "eval_loss": 0.6025602221488953, "eval_runtime": 148.9627, "eval_samples_per_second": 37.969, "eval_steps_per_second": 4.746, "eval_wer": 0.48296448460143476, "step": 3900 }, { "epoch": 0.7742934572202865, "grad_norm": 5.146019458770752, "learning_rate": 0.0001896315789473684, "loss": 0.6193, "step": 4000 }, { "epoch": 0.7742934572202865, "eval_loss": 0.5807139277458191, "eval_runtime": 147.9966, "eval_samples_per_second": 38.217, "eval_steps_per_second": 4.777, "eval_wer": 0.44963168621912664, "step": 4000 }, { "epoch": 0.7936507936507936, "eval_loss": 0.5620962977409363, "eval_runtime": 148.8391, "eval_samples_per_second": 38.001, "eval_steps_per_second": 4.75, "eval_wer": 0.44857248318916404, "step": 4100 }, { "epoch": 0.8130081300813008, "eval_loss": 0.5730157494544983, "eval_runtime": 148.9808, "eval_samples_per_second": 37.965, "eval_steps_per_second": 4.746, "eval_wer": 0.4593410473271172, "step": 4200 }, { "epoch": 0.832365466511808, "eval_loss": 0.5592055916786194, "eval_runtime": 147.8897, "eval_samples_per_second": 38.245, "eval_steps_per_second": 4.781, "eval_wer": 0.43741875431304267, "step": 4300 }, { "epoch": 0.8517228029423152, "eval_loss": 0.5621338486671448, "eval_runtime": 148.7799, "eval_samples_per_second": 38.016, "eval_steps_per_second": 4.752, "eval_wer": 0.42387379435412686, "step": 4400 }, { "epoch": 0.8710801393728222, "grad_norm": 2.8218295574188232, "learning_rate": 0.0001738421052631579, "loss": 0.59, "step": 4500 }, { "epoch": 0.8710801393728222, "eval_loss": 0.545798659324646, "eval_runtime": 150.2397, "eval_samples_per_second": 37.647, "eval_steps_per_second": 4.706, "eval_wer": 0.4304055463722296, "step": 4500 }, { "epoch": 0.8904374758033294, "eval_loss": 0.5406409502029419, "eval_runtime": 148.931, "eval_samples_per_second": 37.977, "eval_steps_per_second": 4.747, "eval_wer": 0.4270674519747717, "step": 4600 }, { "epoch": 0.9097948122338366, "eval_loss": 0.5268651247024536, "eval_runtime": 148.7725, "eval_samples_per_second": 38.018, "eval_steps_per_second": 4.752, "eval_wer": 0.41315337580844474, "step": 4700 }, { "epoch": 0.9291521486643438, "eval_loss": 0.5362106561660767, "eval_runtime": 147.8165, "eval_samples_per_second": 38.264, "eval_steps_per_second": 4.783, "eval_wer": 0.4214665147405755, "step": 4800 }, { "epoch": 0.948509485094851, "eval_loss": 0.5226009488105774, "eval_runtime": 149.4387, "eval_samples_per_second": 37.848, "eval_steps_per_second": 4.731, "eval_wer": 0.41626679077530454, "step": 4900 }, { "epoch": 0.9678668215253581, "grad_norm": 7.241621017456055, "learning_rate": 0.00015808421052631577, "loss": 0.5636, "step": 5000 }, { "epoch": 0.9678668215253581, "eval_loss": 0.5297274589538574, "eval_runtime": 149.3726, "eval_samples_per_second": 37.865, "eval_steps_per_second": 4.733, "eval_wer": 0.4148384715379307, "step": 5000 }, { "epoch": 0.9872241579558653, "eval_loss": 0.5225785970687866, "eval_runtime": 149.3039, "eval_samples_per_second": 37.882, "eval_steps_per_second": 4.735, "eval_wer": 0.413634831731155, "step": 5100 }, { "epoch": 1.0065814943863725, "eval_loss": 0.5239331722259521, "eval_runtime": 149.3008, "eval_samples_per_second": 37.883, "eval_steps_per_second": 4.735, "eval_wer": 0.4054179839835663, "step": 5200 }, { "epoch": 1.0259388308168795, "eval_loss": 0.5382751226425171, "eval_runtime": 148.0837, "eval_samples_per_second": 38.195, "eval_steps_per_second": 4.774, "eval_wer": 0.4057871001909775, "step": 5300 }, { "epoch": 1.0452961672473868, "eval_loss": 0.5125272274017334, "eval_runtime": 149.1205, "eval_samples_per_second": 37.929, "eval_steps_per_second": 4.741, "eval_wer": 0.4067179149748841, "step": 5400 }, { "epoch": 1.064653503677894, "grad_norm": 1.124423623085022, "learning_rate": 0.00014232631578947366, "loss": 0.4924, "step": 5500 }, { "epoch": 1.064653503677894, "eval_loss": 0.5029215812683105, "eval_runtime": 147.7988, "eval_samples_per_second": 38.268, "eval_steps_per_second": 4.784, "eval_wer": 0.39533950666816453, "step": 5500 }, { "epoch": 1.084010840108401, "eval_loss": 0.505442202091217, "eval_runtime": 149.3815, "eval_samples_per_second": 37.863, "eval_steps_per_second": 4.733, "eval_wer": 0.3932050520774823, "step": 5600 }, { "epoch": 1.1033681765389083, "eval_loss": 0.4968744218349457, "eval_runtime": 149.7956, "eval_samples_per_second": 37.758, "eval_steps_per_second": 4.72, "eval_wer": 0.3894015502880711, "step": 5700 }, { "epoch": 1.1227255129694154, "eval_loss": 0.49354633688926697, "eval_runtime": 148.4196, "eval_samples_per_second": 38.108, "eval_steps_per_second": 4.764, "eval_wer": 0.38508449551443563, "step": 5800 }, { "epoch": 1.1420828493999227, "eval_loss": 0.49766939878463745, "eval_runtime": 149.5302, "eval_samples_per_second": 37.825, "eval_steps_per_second": 4.728, "eval_wer": 0.3816501099324357, "step": 5900 }, { "epoch": 1.1614401858304297, "grad_norm": 1.9677255153656006, "learning_rate": 0.00012653684210526316, "loss": 0.4602, "step": 6000 }, { "epoch": 1.1614401858304297, "eval_loss": 0.4862758219242096, "eval_runtime": 150.9135, "eval_samples_per_second": 37.478, "eval_steps_per_second": 4.685, "eval_wer": 0.387395483943445, "step": 6000 }, { "epoch": 1.1807975222609368, "eval_loss": 0.4906172454357147, "eval_runtime": 148.5353, "eval_samples_per_second": 38.078, "eval_steps_per_second": 4.76, "eval_wer": 0.3776700743046974, "step": 6100 }, { "epoch": 1.2001548586914441, "eval_loss": 0.4891129434108734, "eval_runtime": 149.6289, "eval_samples_per_second": 37.8, "eval_steps_per_second": 4.725, "eval_wer": 0.3763861918441367, "step": 6200 }, { "epoch": 1.2195121951219512, "eval_loss": 0.488125741481781, "eval_runtime": 148.288, "eval_samples_per_second": 38.142, "eval_steps_per_second": 4.768, "eval_wer": 0.3800934024490058, "step": 6300 }, { "epoch": 1.2388695315524583, "eval_loss": 0.48135778307914734, "eval_runtime": 147.6048, "eval_samples_per_second": 38.319, "eval_steps_per_second": 4.79, "eval_wer": 0.37266293270851053, "step": 6400 }, { "epoch": 1.2582268679829656, "grad_norm": 1.2907174825668335, "learning_rate": 0.00011074736842105263, "loss": 0.4407, "step": 6500 }, { "epoch": 1.2582268679829656, "eval_loss": 0.47142112255096436, "eval_runtime": 147.9064, "eval_samples_per_second": 38.24, "eval_steps_per_second": 4.78, "eval_wer": 0.37723676397425815, "step": 6500 }, { "epoch": 1.2775842044134726, "eval_loss": 0.47389352321624756, "eval_runtime": 146.8545, "eval_samples_per_second": 38.514, "eval_steps_per_second": 4.814, "eval_wer": 0.3705605751793423, "step": 6600 }, { "epoch": 1.29694154084398, "eval_loss": 0.4691925644874573, "eval_runtime": 146.6568, "eval_samples_per_second": 38.566, "eval_steps_per_second": 4.821, "eval_wer": 0.3713790502479498, "step": 6700 }, { "epoch": 1.316298877274487, "eval_loss": 0.4672953486442566, "eval_runtime": 146.8165, "eval_samples_per_second": 38.524, "eval_steps_per_second": 4.816, "eval_wer": 0.3728073694853236, "step": 6800 }, { "epoch": 1.3356562137049943, "eval_loss": 0.46098417043685913, "eval_runtime": 147.3051, "eval_samples_per_second": 38.397, "eval_steps_per_second": 4.8, "eval_wer": 0.36780022788913674, "step": 6900 }, { "epoch": 1.3550135501355014, "grad_norm": 0.8472552299499512, "learning_rate": 9.49578947368421e-05, "loss": 0.4284, "step": 7000 }, { "epoch": 1.3550135501355014, "eval_loss": 0.47299668192863464, "eval_runtime": 151.323, "eval_samples_per_second": 37.377, "eval_steps_per_second": 4.672, "eval_wer": 0.36531270562180035, "step": 7000 }, { "epoch": 1.3743708865660085, "eval_loss": 0.46056076884269714, "eval_runtime": 146.0139, "eval_samples_per_second": 38.736, "eval_steps_per_second": 4.842, "eval_wer": 0.36399672609972555, "step": 7100 }, { "epoch": 1.3937282229965158, "eval_loss": 0.4571812152862549, "eval_runtime": 146.7792, "eval_samples_per_second": 38.534, "eval_steps_per_second": 4.817, "eval_wer": 0.3620067082858564, "step": 7200 }, { "epoch": 1.4130855594270229, "eval_loss": 0.45746785402297974, "eval_runtime": 146.8097, "eval_samples_per_second": 38.526, "eval_steps_per_second": 4.816, "eval_wer": 0.362969620131277, "step": 7300 }, { "epoch": 1.43244289585753, "eval_loss": 0.45778077840805054, "eval_runtime": 146.9433, "eval_samples_per_second": 38.491, "eval_steps_per_second": 4.811, "eval_wer": 0.3590216815650527, "step": 7400 }, { "epoch": 1.4518002322880372, "grad_norm": 0.9635696411132812, "learning_rate": 7.916842105263156e-05, "loss": 0.4299, "step": 7500 }, { "epoch": 1.4518002322880372, "eval_loss": 0.4477390646934509, "eval_runtime": 146.7454, "eval_samples_per_second": 38.543, "eval_steps_per_second": 4.818, "eval_wer": 0.3569193240358845, "step": 7500 }, { "epoch": 1.4711575687185443, "eval_loss": 0.4441732168197632, "eval_runtime": 147.4263, "eval_samples_per_second": 38.365, "eval_steps_per_second": 4.796, "eval_wer": 0.3551700341833705, "step": 7600 }, { "epoch": 1.4905149051490514, "eval_loss": 0.4420062303543091, "eval_runtime": 146.725, "eval_samples_per_second": 38.548, "eval_steps_per_second": 4.819, "eval_wer": 0.3546083356068752, "step": 7700 }, { "epoch": 1.5098722415795587, "eval_loss": 0.4436999559402466, "eval_runtime": 145.7818, "eval_samples_per_second": 38.798, "eval_steps_per_second": 4.85, "eval_wer": 0.3482531174270995, "step": 7800 }, { "epoch": 1.5292295780100658, "eval_loss": 0.43728071451187134, "eval_runtime": 146.721, "eval_samples_per_second": 38.549, "eval_steps_per_second": 4.819, "eval_wer": 0.3485740880422397, "step": 7900 }, { "epoch": 1.5485869144405728, "grad_norm": 1.1358890533447266, "learning_rate": 6.341052631578946e-05, "loss": 0.408, "step": 8000 }, { "epoch": 1.5485869144405728, "eval_loss": 0.4335756301879883, "eval_runtime": 146.7599, "eval_samples_per_second": 38.539, "eval_steps_per_second": 4.817, "eval_wer": 0.3464075363900435, "step": 8000 }, { "epoch": 1.5679442508710801, "eval_loss": 0.4347936511039734, "eval_runtime": 146.7423, "eval_samples_per_second": 38.544, "eval_steps_per_second": 4.818, "eval_wer": 0.34475453772207154, "step": 8100 }, { "epoch": 1.5873015873015874, "eval_loss": 0.42762240767478943, "eval_runtime": 151.2432, "eval_samples_per_second": 37.397, "eval_steps_per_second": 4.675, "eval_wer": 0.34180160806278187, "step": 8200 }, { "epoch": 1.6066589237320945, "eval_loss": 0.42939648032188416, "eval_runtime": 146.0228, "eval_samples_per_second": 38.734, "eval_steps_per_second": 4.842, "eval_wer": 0.3399078814334548, "step": 8300 }, { "epoch": 1.6260162601626016, "eval_loss": 0.42716294527053833, "eval_runtime": 145.901, "eval_samples_per_second": 38.766, "eval_steps_per_second": 4.846, "eval_wer": 0.3387523872189501, "step": 8400 }, { "epoch": 1.645373596593109, "grad_norm": 1.037522792816162, "learning_rate": 4.762105263157894e-05, "loss": 0.3964, "step": 8500 }, { "epoch": 1.645373596593109, "eval_loss": 0.4310940206050873, "eval_runtime": 145.7507, "eval_samples_per_second": 38.806, "eval_steps_per_second": 4.851, "eval_wer": 0.3408707932788753, "step": 8500 }, { "epoch": 1.664730933023616, "eval_loss": 0.4260464608669281, "eval_runtime": 146.3966, "eval_samples_per_second": 38.635, "eval_steps_per_second": 4.829, "eval_wer": 0.3381264945194267, "step": 8600 }, { "epoch": 1.684088269454123, "eval_loss": 0.4260489046573639, "eval_runtime": 146.6331, "eval_samples_per_second": 38.572, "eval_steps_per_second": 4.822, "eval_wer": 0.3370672914894641, "step": 8700 }, { "epoch": 1.7034456058846303, "eval_loss": 0.4259546101093292, "eval_runtime": 146.1762, "eval_samples_per_second": 38.693, "eval_steps_per_second": 4.837, "eval_wer": 0.33636115613615575, "step": 8800 }, { "epoch": 1.7228029423151374, "eval_loss": 0.42149877548217773, "eval_runtime": 147.5316, "eval_samples_per_second": 38.338, "eval_steps_per_second": 4.792, "eval_wer": 0.335109370737109, "step": 8900 }, { "epoch": 1.7421602787456445, "grad_norm": 1.4968059062957764, "learning_rate": 3.186315789473684e-05, "loss": 0.3866, "step": 9000 }, { "epoch": 1.7421602787456445, "eval_loss": 0.4234353303909302, "eval_runtime": 146.8779, "eval_samples_per_second": 38.508, "eval_steps_per_second": 4.814, "eval_wer": 0.3330391102694548, "step": 9000 }, { "epoch": 1.7615176151761518, "eval_loss": 0.4210032522678375, "eval_runtime": 146.0169, "eval_samples_per_second": 38.735, "eval_steps_per_second": 4.842, "eval_wer": 0.3318515189934362, "step": 9100 }, { "epoch": 1.7808749516066589, "eval_loss": 0.41560646891593933, "eval_runtime": 145.9957, "eval_samples_per_second": 38.741, "eval_steps_per_second": 4.843, "eval_wer": 0.3300540835486511, "step": 9200 }, { "epoch": 1.800232288037166, "eval_loss": 0.41584905982017517, "eval_runtime": 147.0182, "eval_samples_per_second": 38.471, "eval_steps_per_second": 4.809, "eval_wer": 0.33032690857152025, "step": 9300 }, { "epoch": 1.8195896244676733, "eval_loss": 0.41545388102531433, "eval_runtime": 147.1819, "eval_samples_per_second": 38.429, "eval_steps_per_second": 4.804, "eval_wer": 0.32944423937988476, "step": 9400 }, { "epoch": 1.8389469608981805, "grad_norm": 0.9967782497406006, "learning_rate": 1.6073684210526313e-05, "loss": 0.37, "step": 9500 }, { "epoch": 1.8389469608981805, "eval_loss": 0.41372692584991455, "eval_runtime": 146.2893, "eval_samples_per_second": 38.663, "eval_steps_per_second": 4.833, "eval_wer": 0.32921955994928664, "step": 9500 }, { "epoch": 1.8583042973286876, "eval_loss": 0.4120025932788849, "eval_runtime": 146.1391, "eval_samples_per_second": 38.703, "eval_steps_per_second": 4.838, "eval_wer": 0.3284492304729502, "step": 9600 }, { "epoch": 1.8776616337591947, "eval_loss": 0.4108966886997223, "eval_runtime": 146.9334, "eval_samples_per_second": 38.494, "eval_steps_per_second": 4.812, "eval_wer": 0.3300701320794081, "step": 9700 }, { "epoch": 1.897018970189702, "eval_loss": 0.4100329577922821, "eval_runtime": 146.8452, "eval_samples_per_second": 38.517, "eval_steps_per_second": 4.815, "eval_wer": 0.32785543483494084, "step": 9800 }, { "epoch": 1.916376306620209, "eval_loss": 0.4094770848751068, "eval_runtime": 146.5252, "eval_samples_per_second": 38.601, "eval_steps_per_second": 4.825, "eval_wer": 0.3266999406204362, "step": 9900 }, { "epoch": 1.9357336430507162, "grad_norm": 0.7779282927513123, "learning_rate": 2.842105263157894e-07, "loss": 0.371, "step": 10000 }, { "epoch": 1.9357336430507162, "eval_loss": 0.409473717212677, "eval_runtime": 146.0247, "eval_samples_per_second": 38.733, "eval_steps_per_second": 4.842, "eval_wer": 0.3270690568278474, "step": 10000 }, { "epoch": 1.9357336430507162, "step": 10000, "total_flos": 1.1255918428180738e+19, "train_loss": 0.7365739318847656, "train_runtime": 19473.5524, "train_samples_per_second": 4.108, "train_steps_per_second": 0.514 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1255918428180738e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }