|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 28120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17780938833570412, |
|
"grad_norm": 1.8023470640182495, |
|
"learning_rate": 0.0002988, |
|
"loss": 4.6629, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17780938833570412, |
|
"eval_loss": 0.3540094494819641, |
|
"eval_runtime": 77.5992, |
|
"eval_samples_per_second": 19.059, |
|
"eval_steps_per_second": 2.384, |
|
"eval_wer": 0.5421303656597775, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35561877667140823, |
|
"grad_norm": 1.411382794380188, |
|
"learning_rate": 0.0002945908761766835, |
|
"loss": 0.6579, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35561877667140823, |
|
"eval_loss": 0.25881391763687134, |
|
"eval_runtime": 77.5785, |
|
"eval_samples_per_second": 19.065, |
|
"eval_steps_per_second": 2.385, |
|
"eval_wer": 0.48825295884119413, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5334281650071123, |
|
"grad_norm": 1.561378836631775, |
|
"learning_rate": 0.00028916002896451845, |
|
"loss": 0.591, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5334281650071123, |
|
"eval_loss": 0.25524866580963135, |
|
"eval_runtime": 77.3789, |
|
"eval_samples_per_second": 19.114, |
|
"eval_steps_per_second": 2.391, |
|
"eval_wer": 0.47200141317788374, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7112375533428165, |
|
"grad_norm": 1.4678888320922852, |
|
"learning_rate": 0.00028372918175235333, |
|
"loss": 0.5467, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7112375533428165, |
|
"eval_loss": 0.23701371252536774, |
|
"eval_runtime": 77.4845, |
|
"eval_samples_per_second": 19.088, |
|
"eval_steps_per_second": 2.388, |
|
"eval_wer": 0.4541600423953365, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8890469416785206, |
|
"grad_norm": 2.583188056945801, |
|
"learning_rate": 0.00027829833454018826, |
|
"loss": 0.5405, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8890469416785206, |
|
"eval_loss": 0.23755988478660583, |
|
"eval_runtime": 78.0759, |
|
"eval_samples_per_second": 18.943, |
|
"eval_steps_per_second": 2.369, |
|
"eval_wer": 0.45557322027910263, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0668563300142249, |
|
"grad_norm": 1.422250509262085, |
|
"learning_rate": 0.00027286748732802314, |
|
"loss": 0.5027, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0668563300142249, |
|
"eval_loss": 0.22338581085205078, |
|
"eval_runtime": 77.6024, |
|
"eval_samples_per_second": 19.059, |
|
"eval_steps_per_second": 2.384, |
|
"eval_wer": 0.4306659600777248, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2446657183499288, |
|
"grad_norm": 1.006721019744873, |
|
"learning_rate": 0.00026743664011585807, |
|
"loss": 0.5001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2446657183499288, |
|
"eval_loss": 0.21763387322425842, |
|
"eval_runtime": 77.5866, |
|
"eval_samples_per_second": 19.063, |
|
"eval_steps_per_second": 2.384, |
|
"eval_wer": 0.42130365659777425, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.422475106685633, |
|
"grad_norm": 1.3477824926376343, |
|
"learning_rate": 0.00026200579290369295, |
|
"loss": 0.4962, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.422475106685633, |
|
"eval_loss": 0.21994474530220032, |
|
"eval_runtime": 77.7546, |
|
"eval_samples_per_second": 19.021, |
|
"eval_steps_per_second": 2.379, |
|
"eval_wer": 0.4205087440381558, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.600284495021337, |
|
"grad_norm": 1.08402419090271, |
|
"learning_rate": 0.00025657494569152783, |
|
"loss": 0.486, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.600284495021337, |
|
"eval_loss": 0.21454988420009613, |
|
"eval_runtime": 77.6412, |
|
"eval_samples_per_second": 19.049, |
|
"eval_steps_per_second": 2.383, |
|
"eval_wer": 0.41671082847553437, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.7780938833570412, |
|
"grad_norm": 0.5935032963752747, |
|
"learning_rate": 0.00025114409847936276, |
|
"loss": 0.47, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7780938833570412, |
|
"eval_loss": 0.21590569615364075, |
|
"eval_runtime": 77.5792, |
|
"eval_samples_per_second": 19.064, |
|
"eval_steps_per_second": 2.385, |
|
"eval_wer": 0.4168874757110051, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.9559032716927454, |
|
"grad_norm": 1.4301828145980835, |
|
"learning_rate": 0.00024571325126719764, |
|
"loss": 0.4557, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9559032716927454, |
|
"eval_loss": 0.2098698765039444, |
|
"eval_runtime": 78.1464, |
|
"eval_samples_per_second": 18.926, |
|
"eval_steps_per_second": 2.367, |
|
"eval_wer": 0.4135311782370606, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.1337126600284497, |
|
"grad_norm": 1.2725244760513306, |
|
"learning_rate": 0.00024028240405503257, |
|
"loss": 0.4514, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.1337126600284497, |
|
"eval_loss": 0.20907503366470337, |
|
"eval_runtime": 78.1315, |
|
"eval_samples_per_second": 18.93, |
|
"eval_steps_per_second": 2.368, |
|
"eval_wer": 0.4099982335276453, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.3115220483641536, |
|
"grad_norm": 1.0271036624908447, |
|
"learning_rate": 0.00023485155684286748, |
|
"loss": 0.4539, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.3115220483641536, |
|
"eval_loss": 0.2038286179304123, |
|
"eval_runtime": 77.8532, |
|
"eval_samples_per_second": 18.997, |
|
"eval_steps_per_second": 2.376, |
|
"eval_wer": 0.40160748984278394, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.4893314366998576, |
|
"grad_norm": 1.0436575412750244, |
|
"learning_rate": 0.0002294315713251267, |
|
"loss": 0.439, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.4893314366998576, |
|
"eval_loss": 0.20407754182815552, |
|
"eval_runtime": 78.101, |
|
"eval_samples_per_second": 18.937, |
|
"eval_steps_per_second": 2.369, |
|
"eval_wer": 0.4024907260201378, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.667140825035562, |
|
"grad_norm": 1.0521398782730103, |
|
"learning_rate": 0.00022400072411296162, |
|
"loss": 0.4378, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.667140825035562, |
|
"eval_loss": 0.20021408796310425, |
|
"eval_runtime": 78.1969, |
|
"eval_samples_per_second": 18.914, |
|
"eval_steps_per_second": 2.366, |
|
"eval_wer": 0.39162692103868574, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.844950213371266, |
|
"grad_norm": 0.716386616230011, |
|
"learning_rate": 0.0002185698769007965, |
|
"loss": 0.4347, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.844950213371266, |
|
"eval_loss": 0.19606797397136688, |
|
"eval_runtime": 78.0563, |
|
"eval_samples_per_second": 18.948, |
|
"eval_steps_per_second": 2.37, |
|
"eval_wer": 0.39109697933227344, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.0227596017069702, |
|
"grad_norm": 0.6210708022117615, |
|
"learning_rate": 0.0002131390296886314, |
|
"loss": 0.4278, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.0227596017069702, |
|
"eval_loss": 0.1994515061378479, |
|
"eval_runtime": 78.0396, |
|
"eval_samples_per_second": 18.952, |
|
"eval_steps_per_second": 2.371, |
|
"eval_wer": 0.3923335099805688, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.200568990042674, |
|
"grad_norm": 0.4629976153373718, |
|
"learning_rate": 0.0002077081824764663, |
|
"loss": 0.4117, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.200568990042674, |
|
"eval_loss": 0.19594013690948486, |
|
"eval_runtime": 77.8452, |
|
"eval_samples_per_second": 18.999, |
|
"eval_steps_per_second": 2.377, |
|
"eval_wer": 0.38915385974209504, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.3783783783783785, |
|
"grad_norm": 0.9285233020782471, |
|
"learning_rate": 0.0002022773352643012, |
|
"loss": 0.4149, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.3783783783783785, |
|
"eval_loss": 0.1925920844078064, |
|
"eval_runtime": 77.8068, |
|
"eval_samples_per_second": 19.009, |
|
"eval_steps_per_second": 2.378, |
|
"eval_wer": 0.3858858858858859, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.5561877667140824, |
|
"grad_norm": 1.0625221729278564, |
|
"learning_rate": 0.00019684648805213612, |
|
"loss": 0.4148, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.5561877667140824, |
|
"eval_loss": 0.1958448737859726, |
|
"eval_runtime": 77.9401, |
|
"eval_samples_per_second": 18.976, |
|
"eval_steps_per_second": 2.374, |
|
"eval_wer": 0.38040982158629216, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.733997155049787, |
|
"grad_norm": 0.7951219081878662, |
|
"learning_rate": 0.00019142650253439536, |
|
"loss": 0.4009, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.733997155049787, |
|
"eval_loss": 0.19297942519187927, |
|
"eval_runtime": 77.4746, |
|
"eval_samples_per_second": 19.09, |
|
"eval_steps_per_second": 2.388, |
|
"eval_wer": 0.37899664370252606, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.9118065433854907, |
|
"grad_norm": 0.689900279045105, |
|
"learning_rate": 0.00018599565532223026, |
|
"loss": 0.4174, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.9118065433854907, |
|
"eval_loss": 0.19552023708820343, |
|
"eval_runtime": 77.7104, |
|
"eval_samples_per_second": 19.032, |
|
"eval_steps_per_second": 2.381, |
|
"eval_wer": 0.3822646175587352, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.089615931721195, |
|
"grad_norm": 0.5333609580993652, |
|
"learning_rate": 0.00018056480811006514, |
|
"loss": 0.4012, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.089615931721195, |
|
"eval_loss": 0.19501054286956787, |
|
"eval_runtime": 77.674, |
|
"eval_samples_per_second": 19.041, |
|
"eval_steps_per_second": 2.382, |
|
"eval_wer": 0.3812047341459106, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.2674253200568995, |
|
"grad_norm": 1.5428721904754639, |
|
"learning_rate": 0.00017513396089790005, |
|
"loss": 0.3974, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.2674253200568995, |
|
"eval_loss": 0.19340351223945618, |
|
"eval_runtime": 78.0603, |
|
"eval_samples_per_second": 18.947, |
|
"eval_steps_per_second": 2.37, |
|
"eval_wer": 0.3773184949655538, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.445234708392603, |
|
"grad_norm": 0.44063669443130493, |
|
"learning_rate": 0.00016970311368573495, |
|
"loss": 0.3943, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.445234708392603, |
|
"eval_loss": 0.18450064957141876, |
|
"eval_runtime": 78.009, |
|
"eval_samples_per_second": 18.959, |
|
"eval_steps_per_second": 2.372, |
|
"eval_wer": 0.37201907790143085, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.623044096728307, |
|
"grad_norm": 0.9376386404037476, |
|
"learning_rate": 0.0001642831281679942, |
|
"loss": 0.4071, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.623044096728307, |
|
"eval_loss": 0.1920066624879837, |
|
"eval_runtime": 77.3116, |
|
"eval_samples_per_second": 19.13, |
|
"eval_steps_per_second": 2.393, |
|
"eval_wer": 0.3838544426779721, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.800853485064011, |
|
"grad_norm": 0.6557429432868958, |
|
"learning_rate": 0.0001588522809558291, |
|
"loss": 0.3968, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.800853485064011, |
|
"eval_loss": 0.1866944283246994, |
|
"eval_runtime": 77.5081, |
|
"eval_samples_per_second": 19.082, |
|
"eval_steps_per_second": 2.387, |
|
"eval_wer": 0.37431549196255076, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.978662873399715, |
|
"grad_norm": 0.5748176574707031, |
|
"learning_rate": 0.000153421433743664, |
|
"loss": 0.3795, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.978662873399715, |
|
"eval_loss": 0.18717192113399506, |
|
"eval_runtime": 77.4836, |
|
"eval_samples_per_second": 19.088, |
|
"eval_steps_per_second": 2.388, |
|
"eval_wer": 0.37131248895954777, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.15647226173542, |
|
"grad_norm": 0.8843936920166016, |
|
"learning_rate": 0.0001479905865314989, |
|
"loss": 0.3856, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.15647226173542, |
|
"eval_loss": 0.18692350387573242, |
|
"eval_runtime": 77.4161, |
|
"eval_samples_per_second": 19.105, |
|
"eval_steps_per_second": 2.39, |
|
"eval_wer": 0.3736972266384031, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.334281650071124, |
|
"grad_norm": 1.5647237300872803, |
|
"learning_rate": 0.0001425597393193338, |
|
"loss": 0.3706, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.334281650071124, |
|
"eval_loss": 0.19033658504486084, |
|
"eval_runtime": 77.561, |
|
"eval_samples_per_second": 19.069, |
|
"eval_steps_per_second": 2.385, |
|
"eval_wer": 0.37661190602367073, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.512091038406828, |
|
"grad_norm": 2.1427793502807617, |
|
"learning_rate": 0.00013713975380159305, |
|
"loss": 0.3784, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.512091038406828, |
|
"eval_loss": 0.18607346713542938, |
|
"eval_runtime": 77.668, |
|
"eval_samples_per_second": 19.043, |
|
"eval_steps_per_second": 2.382, |
|
"eval_wer": 0.3683094859565448, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.689900426742532, |
|
"grad_norm": 2.697434186935425, |
|
"learning_rate": 0.00013170890658942793, |
|
"loss": 0.3777, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.689900426742532, |
|
"eval_loss": 0.1866033524274826, |
|
"eval_runtime": 77.9873, |
|
"eval_samples_per_second": 18.965, |
|
"eval_steps_per_second": 2.372, |
|
"eval_wer": 0.37131248895954777, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.867709815078236, |
|
"grad_norm": 1.5033948421478271, |
|
"learning_rate": 0.00012627805937726286, |
|
"loss": 0.3861, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.867709815078236, |
|
"eval_loss": 0.18121445178985596, |
|
"eval_runtime": 77.6152, |
|
"eval_samples_per_second": 19.056, |
|
"eval_steps_per_second": 2.384, |
|
"eval_wer": 0.3637166578343049, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.0455192034139404, |
|
"grad_norm": 0.5671353936195374, |
|
"learning_rate": 0.00012084721216509774, |
|
"loss": 0.3711, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.0455192034139404, |
|
"eval_loss": 0.18417000770568848, |
|
"eval_runtime": 77.4756, |
|
"eval_samples_per_second": 19.09, |
|
"eval_steps_per_second": 2.388, |
|
"eval_wer": 0.3667196608373079, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.223328591749644, |
|
"grad_norm": 1.0996285676956177, |
|
"learning_rate": 0.00011542722664735697, |
|
"loss": 0.374, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.223328591749644, |
|
"eval_loss": 0.18148785829544067, |
|
"eval_runtime": 77.7259, |
|
"eval_samples_per_second": 19.028, |
|
"eval_steps_per_second": 2.38, |
|
"eval_wer": 0.3617735382441265, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.401137980085348, |
|
"grad_norm": 0.6861454844474792, |
|
"learning_rate": 0.00010999637943519187, |
|
"loss": 0.3539, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.401137980085348, |
|
"eval_loss": 0.18153779208660126, |
|
"eval_runtime": 77.7594, |
|
"eval_samples_per_second": 19.02, |
|
"eval_steps_per_second": 2.379, |
|
"eval_wer": 0.3646882176293941, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.578947368421053, |
|
"grad_norm": 1.423963189125061, |
|
"learning_rate": 0.00010456553222302678, |
|
"loss": 0.3625, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.578947368421053, |
|
"eval_loss": 0.17849859595298767, |
|
"eval_runtime": 77.8982, |
|
"eval_samples_per_second": 18.986, |
|
"eval_steps_per_second": 2.375, |
|
"eval_wer": 0.3588588588588589, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.756756756756757, |
|
"grad_norm": 0.9670858383178711, |
|
"learning_rate": 9.913468501086169e-05, |
|
"loss": 0.3599, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.756756756756757, |
|
"eval_loss": 0.17952215671539307, |
|
"eval_runtime": 77.3124, |
|
"eval_samples_per_second": 19.13, |
|
"eval_steps_per_second": 2.393, |
|
"eval_wer": 0.362126832715068, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.934566145092461, |
|
"grad_norm": 0.523705005645752, |
|
"learning_rate": 9.370383779869658e-05, |
|
"loss": 0.3654, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.934566145092461, |
|
"eval_loss": 0.1822131723165512, |
|
"eval_runtime": 77.744, |
|
"eval_samples_per_second": 19.024, |
|
"eval_steps_per_second": 2.38, |
|
"eval_wer": 0.36239180356827416, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.112375533428165, |
|
"grad_norm": 1.0047301054000854, |
|
"learning_rate": 8.82729905865315e-05, |
|
"loss": 0.3693, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.112375533428165, |
|
"eval_loss": 0.17921391129493713, |
|
"eval_runtime": 77.3373, |
|
"eval_samples_per_second": 19.124, |
|
"eval_steps_per_second": 2.392, |
|
"eval_wer": 0.3611552729199788, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.290184921763869, |
|
"grad_norm": 0.6277859807014465, |
|
"learning_rate": 8.285300506879071e-05, |
|
"loss": 0.3519, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.290184921763869, |
|
"eval_loss": 0.18002206087112427, |
|
"eval_runtime": 77.6246, |
|
"eval_samples_per_second": 19.053, |
|
"eval_steps_per_second": 2.383, |
|
"eval_wer": 0.36751457339692634, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.467994310099574, |
|
"grad_norm": 0.8712663650512695, |
|
"learning_rate": 7.743301955104996e-05, |
|
"loss": 0.3553, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.467994310099574, |
|
"eval_loss": 0.1808168739080429, |
|
"eval_runtime": 77.4403, |
|
"eval_samples_per_second": 19.099, |
|
"eval_steps_per_second": 2.389, |
|
"eval_wer": 0.36398162868751105, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.6458036984352775, |
|
"grad_norm": 1.000291109085083, |
|
"learning_rate": 7.200217233888485e-05, |
|
"loss": 0.3451, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.6458036984352775, |
|
"eval_loss": 0.18079166114330292, |
|
"eval_runtime": 77.717, |
|
"eval_samples_per_second": 19.031, |
|
"eval_steps_per_second": 2.38, |
|
"eval_wer": 0.36195018547959723, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.823613086770981, |
|
"grad_norm": 0.9150896668434143, |
|
"learning_rate": 6.657132512671976e-05, |
|
"loss": 0.3558, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.823613086770981, |
|
"eval_loss": 0.17938227951526642, |
|
"eval_runtime": 77.6629, |
|
"eval_samples_per_second": 19.044, |
|
"eval_steps_per_second": 2.382, |
|
"eval_wer": 0.36097862568450806, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.001422475106686, |
|
"grad_norm": 0.4225611686706543, |
|
"learning_rate": 6.114047791455467e-05, |
|
"loss": 0.3595, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.001422475106686, |
|
"eval_loss": 0.17718034982681274, |
|
"eval_runtime": 78.0056, |
|
"eval_samples_per_second": 18.96, |
|
"eval_steps_per_second": 2.372, |
|
"eval_wer": 0.3576223282105635, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.17923186344239, |
|
"grad_norm": 0.7559336423873901, |
|
"learning_rate": 5.570963070238957e-05, |
|
"loss": 0.3404, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.17923186344239, |
|
"eval_loss": 0.17881204187870026, |
|
"eval_runtime": 78.2489, |
|
"eval_samples_per_second": 18.901, |
|
"eval_steps_per_second": 2.364, |
|
"eval_wer": 0.35806394629924043, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.357041251778094, |
|
"grad_norm": 1.2298369407653809, |
|
"learning_rate": 5.027878349022447e-05, |
|
"loss": 0.3593, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.357041251778094, |
|
"eval_loss": 0.1782107651233673, |
|
"eval_runtime": 77.8969, |
|
"eval_samples_per_second": 18.987, |
|
"eval_steps_per_second": 2.375, |
|
"eval_wer": 0.357975622681505, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.534850640113799, |
|
"grad_norm": 0.5965376496315002, |
|
"learning_rate": 4.4847936278059375e-05, |
|
"loss": 0.3471, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.534850640113799, |
|
"eval_loss": 0.17967215180397034, |
|
"eval_runtime": 77.691, |
|
"eval_samples_per_second": 19.037, |
|
"eval_steps_per_second": 2.381, |
|
"eval_wer": 0.3606253312135665, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.712660028449502, |
|
"grad_norm": 0.3367222249507904, |
|
"learning_rate": 3.941708906589427e-05, |
|
"loss": 0.3497, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 8.712660028449502, |
|
"eval_loss": 0.17775095999240875, |
|
"eval_runtime": 78.061, |
|
"eval_samples_per_second": 18.947, |
|
"eval_steps_per_second": 2.37, |
|
"eval_wer": 0.35877053524112346, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 8.890469416785207, |
|
"grad_norm": 1.0665998458862305, |
|
"learning_rate": 3.399710354815351e-05, |
|
"loss": 0.3398, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.890469416785207, |
|
"eval_loss": 0.1774686574935913, |
|
"eval_runtime": 77.6438, |
|
"eval_samples_per_second": 19.049, |
|
"eval_steps_per_second": 2.383, |
|
"eval_wer": 0.3583289171524466, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 9.06827880512091, |
|
"grad_norm": 1.8358111381530762, |
|
"learning_rate": 2.8566256335988413e-05, |
|
"loss": 0.3444, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.06827880512091, |
|
"eval_loss": 0.1796201765537262, |
|
"eval_runtime": 77.7985, |
|
"eval_samples_per_second": 19.011, |
|
"eval_steps_per_second": 2.378, |
|
"eval_wer": 0.35859388800565273, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.246088193456615, |
|
"grad_norm": 1.32257878780365, |
|
"learning_rate": 2.3135409123823315e-05, |
|
"loss": 0.3366, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 9.246088193456615, |
|
"eval_loss": 0.1784891039133072, |
|
"eval_runtime": 77.5756, |
|
"eval_samples_per_second": 19.065, |
|
"eval_steps_per_second": 2.385, |
|
"eval_wer": 0.35735735735735735, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 9.42389758179232, |
|
"grad_norm": 1.8535629510879517, |
|
"learning_rate": 1.7704561911658217e-05, |
|
"loss": 0.3434, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 9.42389758179232, |
|
"eval_loss": 0.17805208265781403, |
|
"eval_runtime": 77.8845, |
|
"eval_samples_per_second": 18.99, |
|
"eval_steps_per_second": 2.375, |
|
"eval_wer": 0.3592121533298004, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 9.601706970128022, |
|
"grad_norm": 1.9332554340362549, |
|
"learning_rate": 1.228457639391745e-05, |
|
"loss": 0.3426, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 9.601706970128022, |
|
"eval_loss": 0.17857009172439575, |
|
"eval_runtime": 77.3945, |
|
"eval_samples_per_second": 19.11, |
|
"eval_steps_per_second": 2.39, |
|
"eval_wer": 0.35930047694753575, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 9.779516358463727, |
|
"grad_norm": 1.1602191925048828, |
|
"learning_rate": 6.8537291817523524e-06, |
|
"loss": 0.3496, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 9.779516358463727, |
|
"eval_loss": 0.17868547141551971, |
|
"eval_runtime": 77.5791, |
|
"eval_samples_per_second": 19.064, |
|
"eval_steps_per_second": 2.385, |
|
"eval_wer": 0.3590355060943296, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 9.95732574679943, |
|
"grad_norm": 1.2770110368728638, |
|
"learning_rate": 1.4228819695872554e-06, |
|
"loss": 0.334, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 9.95732574679943, |
|
"eval_loss": 0.17876511812210083, |
|
"eval_runtime": 78.0274, |
|
"eval_samples_per_second": 18.955, |
|
"eval_steps_per_second": 2.371, |
|
"eval_wer": 0.35877053524112346, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 28120, |
|
"total_flos": 2.1329860467760157e+19, |
|
"train_loss": 0.48163046043254915, |
|
"train_runtime": 15300.4436, |
|
"train_samples_per_second": 7.349, |
|
"train_steps_per_second": 1.838 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 28120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1329860467760157e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|