{ "best_metric": 0.18702100678171693, "best_model_checkpoint": "./en-xlsr/checkpoint-6000", "epoch": 30.0, "eval_steps": 600, "global_step": 6450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.47, "learning_rate": 0.0002983555207517619, "loss": 4.4931, "step": 100 }, { "epoch": 0.93, "learning_rate": 0.0002936570086139389, "loss": 2.9634, "step": 200 }, { "epoch": 1.4, "learning_rate": 0.0002889584964761159, "loss": 2.8686, "step": 300 }, { "epoch": 1.86, "learning_rate": 0.00028425998433829287, "loss": 1.9458, "step": 400 }, { "epoch": 2.33, "learning_rate": 0.0002795614722004698, "loss": 0.8991, "step": 500 }, { "epoch": 2.79, "learning_rate": 0.0002748629600626468, "loss": 0.6992, "step": 600 }, { "epoch": 2.79, "eval_cer": 0.13698708949899893, "eval_loss": 0.4980902671813965, "eval_runtime": 22.423, "eval_samples_per_second": 77.51, "eval_steps_per_second": 4.861, "eval_wer": 0.33759717704140707, "step": 600 }, { "epoch": 3.26, "learning_rate": 0.0002701644479248238, "loss": 0.5755, "step": 700 }, { "epoch": 3.72, "learning_rate": 0.0002654659357870008, "loss": 0.502, "step": 800 }, { "epoch": 4.19, "learning_rate": 0.00026076742364917777, "loss": 0.4539, "step": 900 }, { "epoch": 4.65, "learning_rate": 0.0002560689115113547, "loss": 0.4015, "step": 1000 }, { "epoch": 5.12, "learning_rate": 0.0002513703993735317, "loss": 0.3877, "step": 1100 }, { "epoch": 5.58, "learning_rate": 0.0002466718872357087, "loss": 0.3394, "step": 1200 }, { "epoch": 5.58, "eval_cer": 0.10568890525395255, "eval_loss": 0.393413245677948, "eval_runtime": 22.3286, "eval_samples_per_second": 77.838, "eval_steps_per_second": 4.882, "eval_wer": 0.24673319733142196, "step": 1200 }, { "epoch": 6.05, "learning_rate": 0.00024197337509788565, "loss": 0.336, "step": 1300 }, { "epoch": 6.51, "learning_rate": 0.00023727486296006264, "loss": 0.289, "step": 1400 }, { "epoch": 6.98, "learning_rate": 0.0002325763508222396, "loss": 0.2961, "step": 1500 }, { "epoch": 7.44, "learning_rate": 0.0002278778386844166, "loss": 0.2615, "step": 1600 }, { "epoch": 7.91, "learning_rate": 0.00022317932654659358, "loss": 0.2656, "step": 1700 }, { "epoch": 8.37, "learning_rate": 0.00021848081440877052, "loss": 0.2376, "step": 1800 }, { "epoch": 8.37, "eval_cer": 0.10152348514486917, "eval_loss": 0.41225335001945496, "eval_runtime": 22.5992, "eval_samples_per_second": 76.905, "eval_steps_per_second": 4.823, "eval_wer": 0.2355957435077466, "step": 1800 }, { "epoch": 8.84, "learning_rate": 0.0002137823022709475, "loss": 0.227, "step": 1900 }, { "epoch": 9.3, "learning_rate": 0.00020908379013312447, "loss": 0.2208, "step": 2000 }, { "epoch": 9.77, "learning_rate": 0.00020438527799530146, "loss": 0.2109, "step": 2100 }, { "epoch": 10.23, "learning_rate": 0.00019968676585747845, "loss": 0.2003, "step": 2200 }, { "epoch": 10.7, "learning_rate": 0.00019498825371965542, "loss": 0.1948, "step": 2300 }, { "epoch": 11.16, "learning_rate": 0.0001902897415818324, "loss": 0.1877, "step": 2400 }, { "epoch": 11.16, "eval_cer": 0.09280141762363933, "eval_loss": 0.4269295632839203, "eval_runtime": 22.6943, "eval_samples_per_second": 76.583, "eval_steps_per_second": 4.803, "eval_wer": 0.21359651541048685, "step": 2400 }, { "epoch": 11.63, "learning_rate": 0.0001855912294440094, "loss": 0.1777, "step": 2500 }, { "epoch": 12.09, "learning_rate": 0.00018089271730618636, "loss": 0.1749, "step": 2600 }, { "epoch": 12.56, "learning_rate": 0.00017619420516836332, "loss": 0.1607, "step": 2700 }, { "epoch": 13.02, "learning_rate": 0.00017149569303054031, "loss": 0.1595, "step": 2800 }, { "epoch": 13.49, "learning_rate": 0.00016679718089271728, "loss": 0.1513, "step": 2900 }, { "epoch": 13.95, "learning_rate": 0.00016209866875489427, "loss": 0.1494, "step": 3000 }, { "epoch": 13.95, "eval_cer": 0.09215704324212368, "eval_loss": 0.4648122489452362, "eval_runtime": 22.977, "eval_samples_per_second": 75.641, "eval_steps_per_second": 4.744, "eval_wer": 0.21023322489937696, "step": 3000 }, { "epoch": 14.42, "learning_rate": 0.00015740015661707126, "loss": 0.1414, "step": 3100 }, { "epoch": 14.88, "learning_rate": 0.00015270164447924822, "loss": 0.1363, "step": 3200 }, { "epoch": 15.35, "learning_rate": 0.00014800313234142518, "loss": 0.1319, "step": 3300 }, { "epoch": 15.81, "learning_rate": 0.00014330462020360218, "loss": 0.1244, "step": 3400 }, { "epoch": 16.28, "learning_rate": 0.00013860610806577917, "loss": 0.1194, "step": 3500 }, { "epoch": 16.74, "learning_rate": 0.00013390759592795613, "loss": 0.1186, "step": 3600 }, { "epoch": 16.74, "eval_cer": 0.09190389616367109, "eval_loss": 0.48351120948791504, "eval_runtime": 22.4373, "eval_samples_per_second": 77.46, "eval_steps_per_second": 4.858, "eval_wer": 0.20576721618790317, "step": 3600 }, { "epoch": 17.21, "learning_rate": 0.00012920908379013312, "loss": 0.118, "step": 3700 }, { "epoch": 17.67, "learning_rate": 0.00012451057165231008, "loss": 0.1098, "step": 3800 }, { "epoch": 18.14, "learning_rate": 0.00011981205951448707, "loss": 0.1048, "step": 3900 }, { "epoch": 18.6, "learning_rate": 0.00011511354737666405, "loss": 0.1051, "step": 4000 }, { "epoch": 19.07, "learning_rate": 0.00011041503523884103, "loss": 0.1014, "step": 4100 }, { "epoch": 19.53, "learning_rate": 0.000105716523101018, "loss": 0.0966, "step": 4200 }, { "epoch": 19.53, "eval_cer": 0.08753135571767197, "eval_loss": 0.49859338998794556, "eval_runtime": 22.5733, "eval_samples_per_second": 76.994, "eval_steps_per_second": 4.829, "eval_wer": 0.19782764514528312, "step": 4200 }, { "epoch": 20.0, "learning_rate": 0.00010101801096319498, "loss": 0.097, "step": 4300 }, { "epoch": 20.47, "learning_rate": 9.631949882537196e-05, "loss": 0.09, "step": 4400 }, { "epoch": 20.93, "learning_rate": 9.162098668754895e-05, "loss": 0.0904, "step": 4500 }, { "epoch": 21.4, "learning_rate": 8.692247454972591e-05, "loss": 0.0822, "step": 4600 }, { "epoch": 21.86, "learning_rate": 8.222396241190289e-05, "loss": 0.0869, "step": 4700 }, { "epoch": 22.33, "learning_rate": 7.752545027407988e-05, "loss": 0.083, "step": 4800 }, { "epoch": 22.33, "eval_cer": 0.08620808689848802, "eval_loss": 0.5179353952407837, "eval_runtime": 22.6454, "eval_samples_per_second": 76.748, "eval_steps_per_second": 4.813, "eval_wer": 0.192700005513591, "step": 4800 }, { "epoch": 22.79, "learning_rate": 7.282693813625684e-05, "loss": 0.0817, "step": 4900 }, { "epoch": 23.26, "learning_rate": 6.812842599843383e-05, "loss": 0.0774, "step": 5000 }, { "epoch": 23.72, "learning_rate": 6.34299138606108e-05, "loss": 0.0743, "step": 5100 }, { "epoch": 24.19, "learning_rate": 5.873140172278778e-05, "loss": 0.073, "step": 5200 }, { "epoch": 24.65, "learning_rate": 5.4032889584964754e-05, "loss": 0.0699, "step": 5300 }, { "epoch": 25.12, "learning_rate": 4.933437744714174e-05, "loss": 0.071, "step": 5400 }, { "epoch": 25.12, "eval_cer": 0.08569028605619865, "eval_loss": 0.5539018511772156, "eval_runtime": 22.6635, "eval_samples_per_second": 76.687, "eval_steps_per_second": 4.81, "eval_wer": 0.19082538457297238, "step": 5400 }, { "epoch": 25.58, "learning_rate": 4.463586530931871e-05, "loss": 0.0706, "step": 5500 }, { "epoch": 26.05, "learning_rate": 3.993735317149569e-05, "loss": 0.0696, "step": 5600 }, { "epoch": 26.51, "learning_rate": 3.523884103367267e-05, "loss": 0.0702, "step": 5700 }, { "epoch": 26.98, "learning_rate": 3.0540328895849644e-05, "loss": 0.0656, "step": 5800 }, { "epoch": 27.44, "learning_rate": 2.584181675802662e-05, "loss": 0.0662, "step": 5900 }, { "epoch": 27.91, "learning_rate": 2.1143304620203598e-05, "loss": 0.0648, "step": 6000 }, { "epoch": 27.91, "eval_cer": 0.08437852392239892, "eval_loss": 0.5582728981971741, "eval_runtime": 22.4539, "eval_samples_per_second": 77.403, "eval_steps_per_second": 4.854, "eval_wer": 0.18702100678171693, "step": 6000 }, { "epoch": 28.37, "learning_rate": 1.6444792482380578e-05, "loss": 0.0622, "step": 6100 }, { "epoch": 28.84, "learning_rate": 1.1746280344557556e-05, "loss": 0.0602, "step": 6200 }, { "epoch": 29.3, "learning_rate": 7.047768206734534e-06, "loss": 0.06, "step": 6300 }, { "epoch": 29.77, "learning_rate": 2.349256068911511e-06, "loss": 0.0615, "step": 6400 }, { "epoch": 30.0, "step": 6450, "total_flos": 4.472815672614715e+19, "train_loss": 0.3642665664539781, "train_runtime": 8377.9265, "train_samples_per_second": 49.251, "train_steps_per_second": 0.77 } ], "logging_steps": 100, "max_steps": 6450, "num_train_epochs": 30, "save_steps": 600, "total_flos": 4.472815672614715e+19, "trial_name": null, "trial_params": null }