{ "best_metric": 21.28044187798142, "best_model_checkpoint": "/kaggle/working/whisper-small/checkpoint-2000", "epoch": 2.6490066225165565, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.033112582781456956, "grad_norm": 40.76376724243164, "learning_rate": 4.0000000000000003e-07, "loss": 4.304, "step": 25 }, { "epoch": 0.06622516556291391, "grad_norm": 34.9359245300293, "learning_rate": 9.000000000000001e-07, "loss": 3.9709, "step": 50 }, { "epoch": 0.09933774834437085, "grad_norm": 18.766077041625977, "learning_rate": 1.4000000000000001e-06, "loss": 3.4858, "step": 75 }, { "epoch": 0.13245033112582782, "grad_norm": 16.810834884643555, "learning_rate": 1.9000000000000002e-06, "loss": 3.2396, "step": 100 }, { "epoch": 0.16556291390728478, "grad_norm": 19.717323303222656, "learning_rate": 2.4000000000000003e-06, "loss": 3.1781, "step": 125 }, { "epoch": 0.1986754966887417, "grad_norm": 17.67849349975586, "learning_rate": 2.9e-06, "loss": 3.2006, "step": 150 }, { "epoch": 0.23178807947019867, "grad_norm": 16.326919555664062, "learning_rate": 3.4000000000000005e-06, "loss": 3.146, "step": 175 }, { "epoch": 0.26490066225165565, "grad_norm": 17.752124786376953, "learning_rate": 3.900000000000001e-06, "loss": 3.1915, "step": 200 }, { "epoch": 0.2980132450331126, "grad_norm": 17.477537155151367, "learning_rate": 4.4e-06, "loss": 3.0694, "step": 225 }, { "epoch": 0.33112582781456956, "grad_norm": 17.810102462768555, "learning_rate": 4.9000000000000005e-06, "loss": 3.0796, "step": 250 }, { "epoch": 0.36423841059602646, "grad_norm": 18.251075744628906, "learning_rate": 5.400000000000001e-06, "loss": 3.0895, "step": 275 }, { "epoch": 0.3973509933774834, "grad_norm": 17.90201187133789, "learning_rate": 5.9e-06, "loss": 2.9872, "step": 300 }, { "epoch": 0.4304635761589404, "grad_norm": 18.006668090820312, "learning_rate": 6.4000000000000006e-06, "loss": 3.0425, "step": 325 }, { "epoch": 0.46357615894039733, "grad_norm": 16.625247955322266, "learning_rate": 6.9e-06, "loss": 2.9893, "step": 350 }, { "epoch": 0.4966887417218543, "grad_norm": 19.76040267944336, "learning_rate": 7.4e-06, "loss": 2.9231, "step": 375 }, { "epoch": 0.5298013245033113, "grad_norm": 21.64992904663086, "learning_rate": 7.9e-06, "loss": 3.0319, "step": 400 }, { "epoch": 0.5629139072847682, "grad_norm": 18.87610626220703, "learning_rate": 8.400000000000001e-06, "loss": 2.9039, "step": 425 }, { "epoch": 0.5960264900662252, "grad_norm": 18.467191696166992, "learning_rate": 8.900000000000001e-06, "loss": 2.9607, "step": 450 }, { "epoch": 0.6291390728476821, "grad_norm": 17.798429489135742, "learning_rate": 9.4e-06, "loss": 2.9454, "step": 475 }, { "epoch": 0.6622516556291391, "grad_norm": 18.094806671142578, "learning_rate": 9.9e-06, "loss": 2.9388, "step": 500 }, { "epoch": 0.6622516556291391, "eval_cer": 102.70901330655285, "eval_loss": 2.9817957878112793, "eval_runtime": 2198.2774, "eval_samples_per_second": 2.264, "eval_steps_per_second": 0.283, "step": 500 }, { "epoch": 0.695364238410596, "grad_norm": 16.370372772216797, "learning_rate": 9.600000000000001e-06, "loss": 2.9295, "step": 525 }, { "epoch": 0.7284768211920529, "grad_norm": 18.59728240966797, "learning_rate": 9.100000000000001e-06, "loss": 2.9214, "step": 550 }, { "epoch": 0.7615894039735099, "grad_norm": 16.144062042236328, "learning_rate": 8.6e-06, "loss": 2.9778, "step": 575 }, { "epoch": 0.7947019867549668, "grad_norm": 17.924297332763672, "learning_rate": 8.1e-06, "loss": 2.8858, "step": 600 }, { "epoch": 0.8278145695364238, "grad_norm": 17.39291000366211, "learning_rate": 7.600000000000001e-06, "loss": 2.8875, "step": 625 }, { "epoch": 0.8609271523178808, "grad_norm": 17.19579315185547, "learning_rate": 7.100000000000001e-06, "loss": 2.9667, "step": 650 }, { "epoch": 0.8940397350993378, "grad_norm": 17.705387115478516, "learning_rate": 6.600000000000001e-06, "loss": 2.9193, "step": 675 }, { "epoch": 0.9271523178807947, "grad_norm": 18.197153091430664, "learning_rate": 6.1e-06, "loss": 2.8616, "step": 700 }, { "epoch": 0.9602649006622517, "grad_norm": 16.632579803466797, "learning_rate": 5.600000000000001e-06, "loss": 2.7889, "step": 725 }, { "epoch": 0.9933774834437086, "grad_norm": 23.52530860900879, "learning_rate": 5.1e-06, "loss": 2.4231, "step": 750 }, { "epoch": 1.0264900662251655, "grad_norm": 18.322874069213867, "learning_rate": 4.600000000000001e-06, "loss": 1.1389, "step": 775 }, { "epoch": 1.0596026490066226, "grad_norm": 17.467435836791992, "learning_rate": 4.1e-06, "loss": 0.8244, "step": 800 }, { "epoch": 1.0927152317880795, "grad_norm": 23.982784271240234, "learning_rate": 3.6000000000000003e-06, "loss": 0.6974, "step": 825 }, { "epoch": 1.1258278145695364, "grad_norm": 15.57445240020752, "learning_rate": 3.1000000000000004e-06, "loss": 0.6625, "step": 850 }, { "epoch": 1.1589403973509933, "grad_norm": 16.174701690673828, "learning_rate": 2.6e-06, "loss": 0.6145, "step": 875 }, { "epoch": 1.1920529801324504, "grad_norm": 17.91811752319336, "learning_rate": 2.1000000000000002e-06, "loss": 0.5941, "step": 900 }, { "epoch": 1.2251655629139073, "grad_norm": 16.29407501220703, "learning_rate": 1.6000000000000001e-06, "loss": 0.6053, "step": 925 }, { "epoch": 1.2582781456953642, "grad_norm": 13.573580741882324, "learning_rate": 1.1e-06, "loss": 0.5533, "step": 950 }, { "epoch": 1.2913907284768211, "grad_norm": 14.268310546875, "learning_rate": 6.000000000000001e-07, "loss": 0.5446, "step": 975 }, { "epoch": 1.3245033112582782, "grad_norm": 13.822824478149414, "learning_rate": 1.0000000000000001e-07, "loss": 0.5492, "step": 1000 }, { "epoch": 1.3245033112582782, "eval_cer": 33.39944765252322, "eval_loss": 0.6020073294639587, "eval_runtime": 2262.2462, "eval_samples_per_second": 2.2, "eval_steps_per_second": 0.275, "step": 1000 }, { "epoch": 1.3576158940397351, "grad_norm": 15.552218437194824, "learning_rate": 6.540000000000001e-06, "loss": 0.501, "step": 1025 }, { "epoch": 1.390728476821192, "grad_norm": 18.500070571899414, "learning_rate": 6.373333333333334e-06, "loss": 0.5625, "step": 1050 }, { "epoch": 1.423841059602649, "grad_norm": 13.384895324707031, "learning_rate": 6.206666666666668e-06, "loss": 0.4785, "step": 1075 }, { "epoch": 1.4569536423841059, "grad_norm": 11.979981422424316, "learning_rate": 6.040000000000001e-06, "loss": 0.4728, "step": 1100 }, { "epoch": 1.490066225165563, "grad_norm": 13.125775337219238, "learning_rate": 5.873333333333334e-06, "loss": 0.4484, "step": 1125 }, { "epoch": 1.5231788079470199, "grad_norm": 10.14282512664795, "learning_rate": 5.713333333333334e-06, "loss": 0.4227, "step": 1150 }, { "epoch": 1.5562913907284768, "grad_norm": 14.51291561126709, "learning_rate": 5.546666666666667e-06, "loss": 0.4419, "step": 1175 }, { "epoch": 1.589403973509934, "grad_norm": 13.863635063171387, "learning_rate": 5.380000000000001e-06, "loss": 0.4429, "step": 1200 }, { "epoch": 1.6225165562913908, "grad_norm": 11.522802352905273, "learning_rate": 5.213333333333334e-06, "loss": 0.3981, "step": 1225 }, { "epoch": 1.6556291390728477, "grad_norm": 13.733115196228027, "learning_rate": 5.046666666666668e-06, "loss": 0.3974, "step": 1250 }, { "epoch": 1.6887417218543046, "grad_norm": 11.775789260864258, "learning_rate": 4.880000000000001e-06, "loss": 0.3978, "step": 1275 }, { "epoch": 1.7218543046357615, "grad_norm": 8.765869140625, "learning_rate": 4.713333333333334e-06, "loss": 0.3812, "step": 1300 }, { "epoch": 1.7549668874172184, "grad_norm": 10.572060585021973, "learning_rate": 4.546666666666667e-06, "loss": 0.4257, "step": 1325 }, { "epoch": 1.7880794701986755, "grad_norm": 11.202813148498535, "learning_rate": 4.38e-06, "loss": 0.3635, "step": 1350 }, { "epoch": 1.8211920529801324, "grad_norm": 10.402491569519043, "learning_rate": 4.213333333333333e-06, "loss": 0.3636, "step": 1375 }, { "epoch": 1.8543046357615895, "grad_norm": 22.270811080932617, "learning_rate": 4.046666666666667e-06, "loss": 0.3696, "step": 1400 }, { "epoch": 1.8874172185430464, "grad_norm": 8.238192558288574, "learning_rate": 3.88e-06, "loss": 0.397, "step": 1425 }, { "epoch": 1.9205298013245033, "grad_norm": 9.551776885986328, "learning_rate": 3.713333333333334e-06, "loss": 0.3747, "step": 1450 }, { "epoch": 1.9536423841059603, "grad_norm": 13.149867057800293, "learning_rate": 3.5466666666666673e-06, "loss": 0.3771, "step": 1475 }, { "epoch": 1.9867549668874172, "grad_norm": 13.081770896911621, "learning_rate": 3.3800000000000007e-06, "loss": 0.352, "step": 1500 }, { "epoch": 1.9867549668874172, "eval_cer": 23.299020838563898, "eval_loss": 0.4214184284210205, "eval_runtime": 2258.7536, "eval_samples_per_second": 2.203, "eval_steps_per_second": 0.275, "step": 1500 }, { "epoch": 2.019867549668874, "grad_norm": 7.450255393981934, "learning_rate": 3.213333333333334e-06, "loss": 0.2911, "step": 1525 }, { "epoch": 2.052980132450331, "grad_norm": 8.604903221130371, "learning_rate": 3.0466666666666666e-06, "loss": 0.2069, "step": 1550 }, { "epoch": 2.0860927152317883, "grad_norm": 5.367754936218262, "learning_rate": 2.88e-06, "loss": 0.2461, "step": 1575 }, { "epoch": 2.119205298013245, "grad_norm": 11.53250789642334, "learning_rate": 2.7133333333333333e-06, "loss": 0.2071, "step": 1600 }, { "epoch": 2.152317880794702, "grad_norm": 9.057580947875977, "learning_rate": 2.5466666666666667e-06, "loss": 0.2255, "step": 1625 }, { "epoch": 2.185430463576159, "grad_norm": 8.253719329833984, "learning_rate": 2.38e-06, "loss": 0.1968, "step": 1650 }, { "epoch": 2.218543046357616, "grad_norm": 10.867476463317871, "learning_rate": 2.2133333333333335e-06, "loss": 0.2177, "step": 1675 }, { "epoch": 2.251655629139073, "grad_norm": 10.779939651489258, "learning_rate": 2.046666666666667e-06, "loss": 0.2191, "step": 1700 }, { "epoch": 2.2847682119205297, "grad_norm": 8.389144897460938, "learning_rate": 1.8800000000000002e-06, "loss": 0.2137, "step": 1725 }, { "epoch": 2.3178807947019866, "grad_norm": 11.38824462890625, "learning_rate": 1.7133333333333336e-06, "loss": 0.2326, "step": 1750 }, { "epoch": 2.3509933774834435, "grad_norm": 10.286229133605957, "learning_rate": 1.546666666666667e-06, "loss": 0.2014, "step": 1775 }, { "epoch": 2.384105960264901, "grad_norm": 9.5010986328125, "learning_rate": 1.3800000000000001e-06, "loss": 0.2121, "step": 1800 }, { "epoch": 2.4172185430463577, "grad_norm": 10.394664764404297, "learning_rate": 1.2133333333333335e-06, "loss": 0.2166, "step": 1825 }, { "epoch": 2.4503311258278146, "grad_norm": 13.732166290283203, "learning_rate": 1.0466666666666669e-06, "loss": 0.2243, "step": 1850 }, { "epoch": 2.4834437086092715, "grad_norm": 7.431657791137695, "learning_rate": 8.8e-07, "loss": 0.2186, "step": 1875 }, { "epoch": 2.5165562913907285, "grad_norm": 5.561306476593018, "learning_rate": 7.133333333333334e-07, "loss": 0.1992, "step": 1900 }, { "epoch": 2.5496688741721854, "grad_norm": 8.894646644592285, "learning_rate": 5.466666666666667e-07, "loss": 0.2195, "step": 1925 }, { "epoch": 2.5827814569536423, "grad_norm": 8.13320541381836, "learning_rate": 3.8e-07, "loss": 0.2235, "step": 1950 }, { "epoch": 2.6158940397350996, "grad_norm": 8.711597442626953, "learning_rate": 2.1333333333333334e-07, "loss": 0.2033, "step": 1975 }, { "epoch": 2.6490066225165565, "grad_norm": 9.557293891906738, "learning_rate": 4.6666666666666674e-08, "loss": 0.2243, "step": 2000 }, { "epoch": 2.6490066225165565, "eval_cer": 21.28044187798142, "eval_loss": 0.38667094707489014, "eval_runtime": 2236.5916, "eval_samples_per_second": 2.225, "eval_steps_per_second": 0.278, "step": 2000 }, { "epoch": 2.6490066225165565, "step": 2000, "total_flos": 9.31455866216448e+18, "train_loss": 0.15934584045410155, "train_runtime": 12035.0029, "train_samples_per_second": 2.659, "train_steps_per_second": 0.166 } ], "logging_steps": 25, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.31455866216448e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }