{ "best_metric": 0.1746993511915207, "best_model_checkpoint": "./checkpoint-8000", "epoch": 69.56521739130434, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.87, "learning_rate": 1.0304e-05, "loss": 6.0688, "step": 100 }, { "epoch": 1.74, "learning_rate": 2.0204000000000002e-05, "loss": 3.1889, "step": 200 }, { "epoch": 2.61, "learning_rate": 3.0104e-05, "loss": 2.2092, "step": 300 }, { "epoch": 3.48, "learning_rate": 4.000399999999999e-05, "loss": 1.5574, "step": 400 }, { "epoch": 4.35, "learning_rate": 4.9903999999999995e-05, "loss": 1.3719, "step": 500 }, { "epoch": 4.35, "eval_cer": 0.08328164981226813, "eval_loss": 0.3388712704181671, "eval_runtime": 199.6026, "eval_samples_per_second": 29.068, "eval_steps_per_second": 0.456, "eval_wer": 0.42359243003445185, "step": 500 }, { "epoch": 5.22, "learning_rate": 5.9804e-05, "loss": 1.3634, "step": 600 }, { "epoch": 6.09, "learning_rate": 6.970400000000001e-05, "loss": 1.3345, "step": 700 }, { "epoch": 6.96, "learning_rate": 7.960399999999999e-05, "loss": 1.2407, "step": 800 }, { "epoch": 7.83, "learning_rate": 8e-05, "loss": 1.172, "step": 900 }, { "epoch": 8.7, "learning_rate": 8e-05, "loss": 1.1361, "step": 1000 }, { "epoch": 8.7, "eval_cer": 0.06299695098173931, "eval_loss": 0.23091697692871094, "eval_runtime": 209.236, "eval_samples_per_second": 27.729, "eval_steps_per_second": 0.435, "eval_wer": 0.31617820567275473, "step": 1000 }, { "epoch": 9.57, "learning_rate": 8e-05, "loss": 1.0819, "step": 1100 }, { "epoch": 10.43, "learning_rate": 8e-05, "loss": 1.0923, "step": 1200 }, { "epoch": 11.3, "learning_rate": 8e-05, "loss": 1.0803, "step": 1300 }, { "epoch": 12.17, "learning_rate": 8e-05, "loss": 1.0719, "step": 1400 }, { "epoch": 13.04, "learning_rate": 8e-05, "loss": 1.0517, "step": 1500 }, { "epoch": 13.04, "eval_cer": 0.059732533136255095, "eval_loss": 0.21664097905158997, "eval_runtime": 198.1012, "eval_samples_per_second": 29.288, "eval_steps_per_second": 0.459, "eval_wer": 0.3056066827127283, "step": 1500 }, { "epoch": 13.91, "learning_rate": 8e-05, "loss": 1.0523, "step": 1600 }, { "epoch": 14.78, "learning_rate": 8e-05, "loss": 1.0414, "step": 1700 }, { "epoch": 15.65, "learning_rate": 8e-05, "loss": 1.031, "step": 1800 }, { "epoch": 16.52, "learning_rate": 8e-05, "loss": 1.0126, "step": 1900 }, { "epoch": 17.39, "learning_rate": 8e-05, "loss": 1.0118, "step": 2000 }, { "epoch": 17.39, "eval_cer": 0.055684506456416864, "eval_loss": 0.21414823830127716, "eval_runtime": 204.5017, "eval_samples_per_second": 28.371, "eval_steps_per_second": 0.445, "eval_wer": 0.2783991693803389, "step": 2000 }, { "epoch": 18.26, "learning_rate": 8e-05, "loss": 1.0243, "step": 2100 }, { "epoch": 19.13, "learning_rate": 8e-05, "loss": 1.0084, "step": 2200 }, { "epoch": 20.0, "learning_rate": 8e-05, "loss": 1.0074, "step": 2300 }, { "epoch": 20.87, "learning_rate": 8e-05, "loss": 0.9933, "step": 2400 }, { "epoch": 21.74, "learning_rate": 8e-05, "loss": 0.9922, "step": 2500 }, { "epoch": 21.74, "eval_cer": 0.05935372696988491, "eval_loss": 0.22312845289707184, "eval_runtime": 198.4087, "eval_samples_per_second": 29.243, "eval_steps_per_second": 0.459, "eval_wer": 0.29411487092359245, "step": 2500 }, { "epoch": 22.61, "learning_rate": 8e-05, "loss": 0.9838, "step": 2600 }, { "epoch": 23.48, "learning_rate": 8e-05, "loss": 1.019, "step": 2700 }, { "epoch": 24.35, "learning_rate": 8e-05, "loss": 0.9897, "step": 2800 }, { "epoch": 25.22, "learning_rate": 8e-05, "loss": 0.9897, "step": 2900 }, { "epoch": 26.09, "learning_rate": 8e-05, "loss": 0.9929, "step": 3000 }, { "epoch": 26.09, "eval_cer": 0.05865182142631663, "eval_loss": 0.21711210906505585, "eval_runtime": 199.7486, "eval_samples_per_second": 29.047, "eval_steps_per_second": 0.456, "eval_wer": 0.28915946953608, "step": 3000 }, { "epoch": 26.96, "learning_rate": 8e-05, "loss": 0.9971, "step": 3100 }, { "epoch": 27.83, "learning_rate": 8e-05, "loss": 0.9621, "step": 3200 }, { "epoch": 28.7, "learning_rate": 8e-05, "loss": 0.9744, "step": 3300 }, { "epoch": 29.57, "learning_rate": 8e-05, "loss": 0.9587, "step": 3400 }, { "epoch": 30.43, "learning_rate": 8e-05, "loss": 0.9485, "step": 3500 }, { "epoch": 30.43, "eval_cer": 0.05992565000538499, "eval_loss": 0.2236333191394806, "eval_runtime": 197.9762, "eval_samples_per_second": 29.307, "eval_steps_per_second": 0.46, "eval_wer": 0.29560149133984615, "step": 3500 }, { "epoch": 31.3, "learning_rate": 8e-05, "loss": 0.9468, "step": 3600 }, { "epoch": 32.17, "learning_rate": 8e-05, "loss": 0.9598, "step": 3700 }, { "epoch": 33.04, "learning_rate": 8e-05, "loss": 0.9575, "step": 3800 }, { "epoch": 33.91, "learning_rate": 8e-05, "loss": 0.9491, "step": 3900 }, { "epoch": 34.78, "learning_rate": 8e-05, "loss": 0.9573, "step": 4000 }, { "epoch": 34.78, "eval_cer": 0.061589426108657946, "eval_loss": 0.2313707023859024, "eval_runtime": 198.2807, "eval_samples_per_second": 29.262, "eval_steps_per_second": 0.459, "eval_wer": 0.304285242342725, "step": 4000 }, { "epoch": 35.65, "learning_rate": 7.8176e-05, "loss": 0.9251, "step": 4100 }, { "epoch": 36.52, "learning_rate": 7.627600000000001e-05, "loss": 0.9299, "step": 4200 }, { "epoch": 37.39, "learning_rate": 7.437600000000001e-05, "loss": 0.928, "step": 4300 }, { "epoch": 38.26, "learning_rate": 7.2476e-05, "loss": 0.9303, "step": 4400 }, { "epoch": 39.13, "learning_rate": 7.057600000000001e-05, "loss": 0.9195, "step": 4500 }, { "epoch": 39.13, "eval_cer": 0.058024191601644466, "eval_loss": 0.21694457530975342, "eval_runtime": 198.1273, "eval_samples_per_second": 29.284, "eval_steps_per_second": 0.459, "eval_wer": 0.2812308273160602, "step": 4500 }, { "epoch": 40.0, "learning_rate": 6.8676e-05, "loss": 0.9182, "step": 4600 }, { "epoch": 40.87, "learning_rate": 6.6776e-05, "loss": 0.8885, "step": 4700 }, { "epoch": 41.74, "learning_rate": 6.487600000000001e-05, "loss": 0.9037, "step": 4800 }, { "epoch": 42.61, "learning_rate": 6.2976e-05, "loss": 0.8929, "step": 4900 }, { "epoch": 43.48, "learning_rate": 6.1076e-05, "loss": 0.8915, "step": 5000 }, { "epoch": 43.48, "eval_cer": 0.05601503340550457, "eval_loss": 0.21094831824302673, "eval_runtime": 198.7631, "eval_samples_per_second": 29.191, "eval_steps_per_second": 0.458, "eval_wer": 0.2779508235405163, "step": 5000 }, { "epoch": 44.35, "learning_rate": 5.9176000000000004e-05, "loss": 0.8687, "step": 5100 }, { "epoch": 45.22, "learning_rate": 5.7276000000000005e-05, "loss": 0.8635, "step": 5200 }, { "epoch": 46.09, "learning_rate": 5.5376e-05, "loss": 0.859, "step": 5300 }, { "epoch": 46.96, "learning_rate": 5.3476e-05, "loss": 0.8586, "step": 5400 }, { "epoch": 47.83, "learning_rate": 5.157600000000001e-05, "loss": 0.8449, "step": 5500 }, { "epoch": 47.83, "eval_cer": 0.05136537340260782, "eval_loss": 0.20504631102085114, "eval_runtime": 198.4879, "eval_samples_per_second": 29.231, "eval_steps_per_second": 0.458, "eval_wer": 0.25338619094813347, "step": 5500 }, { "epoch": 48.7, "learning_rate": 4.9676000000000003e-05, "loss": 0.8345, "step": 5600 }, { "epoch": 49.57, "learning_rate": 4.7776e-05, "loss": 0.8229, "step": 5700 }, { "epoch": 50.43, "learning_rate": 4.5876000000000006e-05, "loss": 0.8203, "step": 5800 }, { "epoch": 51.3, "learning_rate": 4.397600000000001e-05, "loss": 0.8084, "step": 5900 }, { "epoch": 52.17, "learning_rate": 4.207600000000001e-05, "loss": 0.8028, "step": 6000 }, { "epoch": 52.17, "eval_cer": 0.04915567076544842, "eval_loss": 0.2032497674226761, "eval_runtime": 199.0329, "eval_samples_per_second": 29.151, "eval_steps_per_second": 0.457, "eval_wer": 0.24562272877436406, "step": 6000 }, { "epoch": 53.04, "learning_rate": 4.0176e-05, "loss": 0.7975, "step": 6100 }, { "epoch": 53.91, "learning_rate": 3.8295000000000005e-05, "loss": 0.7942, "step": 6200 }, { "epoch": 54.78, "learning_rate": 3.6395e-05, "loss": 0.7867, "step": 6300 }, { "epoch": 55.65, "learning_rate": 3.4495e-05, "loss": 0.787, "step": 6400 }, { "epoch": 56.52, "learning_rate": 3.2595e-05, "loss": 0.7881, "step": 6500 }, { "epoch": 56.52, "eval_cer": 0.04694596812828902, "eval_loss": 0.18896546959877014, "eval_runtime": 199.9061, "eval_samples_per_second": 29.024, "eval_steps_per_second": 0.455, "eval_wer": 0.2380008494973807, "step": 6500 }, { "epoch": 57.39, "learning_rate": 3.0695e-05, "loss": 0.7608, "step": 6600 }, { "epoch": 58.26, "learning_rate": 2.8795000000000005e-05, "loss": 0.7542, "step": 6700 }, { "epoch": 59.13, "learning_rate": 2.6895e-05, "loss": 0.755, "step": 6800 }, { "epoch": 60.0, "learning_rate": 2.4995000000000004e-05, "loss": 0.7569, "step": 6900 }, { "epoch": 60.87, "learning_rate": 2.3095e-05, "loss": 0.7423, "step": 7000 }, { "epoch": 60.87, "eval_cer": 0.04418662517129838, "eval_loss": 0.18159770965576172, "eval_runtime": 198.6364, "eval_samples_per_second": 29.209, "eval_steps_per_second": 0.458, "eval_wer": 0.22452687715323988, "step": 7000 }, { "epoch": 61.74, "learning_rate": 2.1195000000000006e-05, "loss": 0.7409, "step": 7100 }, { "epoch": 62.61, "learning_rate": 1.9295e-05, "loss": 0.7208, "step": 7200 }, { "epoch": 63.48, "learning_rate": 1.7395e-05, "loss": 0.7488, "step": 7300 }, { "epoch": 64.35, "learning_rate": 1.5495000000000003e-05, "loss": 0.7134, "step": 7400 }, { "epoch": 65.22, "learning_rate": 1.3594999999999998e-05, "loss": 0.7248, "step": 7500 }, { "epoch": 65.22, "eval_cer": 0.042203463476772125, "eval_loss": 0.17892056703567505, "eval_runtime": 197.8292, "eval_samples_per_second": 29.328, "eval_steps_per_second": 0.46, "eval_wer": 0.21650384633536268, "step": 7500 }, { "epoch": 66.09, "learning_rate": 1.1695000000000002e-05, "loss": 0.7076, "step": 7600 }, { "epoch": 66.96, "learning_rate": 9.794999999999999e-06, "loss": 0.7132, "step": 7700 }, { "epoch": 67.83, "learning_rate": 7.895000000000003e-06, "loss": 0.6972, "step": 7800 }, { "epoch": 68.7, "learning_rate": 5.994999999999999e-06, "loss": 0.7019, "step": 7900 }, { "epoch": 69.57, "learning_rate": 4.095000000000005e-06, "loss": 0.6993, "step": 8000 }, { "epoch": 69.57, "eval_cer": 0.040792224817745956, "eval_loss": 0.1746993511915207, "eval_runtime": 198.0151, "eval_samples_per_second": 29.301, "eval_steps_per_second": 0.46, "eval_wer": 0.21072254471659824, "step": 8000 }, { "epoch": 69.57, "step": 8000, "total_flos": 3.5433976605211066e+20, "train_loss": 1.039798891067505, "train_runtime": 49145.562, "train_samples_per_second": 20.836, "train_steps_per_second": 0.163 } ], "max_steps": 8000, "num_train_epochs": 70, "total_flos": 3.5433976605211066e+20, "trial_name": null, "trial_params": null }