{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.996463098325867, "eval_steps": 2000, "global_step": 31800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 0.00014879999999999998, "loss": 14.6823, "step": 500 }, { "epoch": 0.47, "learning_rate": 0.0002985, "loss": 4.0589, "step": 1000 }, { "epoch": 0.71, "learning_rate": 0.0002951785714285714, "loss": 1.959, "step": 1500 }, { "epoch": 0.94, "learning_rate": 0.00029030844155844153, "loss": 1.5961, "step": 2000 }, { "epoch": 0.94, "eval_cer": 0.27122803037904003, "eval_loss": 1.068536400794983, "eval_runtime": 2688.7775, "eval_samples_per_second": 6.291, "eval_steps_per_second": 0.393, "step": 2000 }, { "epoch": 1.18, "learning_rate": 0.00028543831168831166, "loss": 1.4159, "step": 2500 }, { "epoch": 1.41, "learning_rate": 0.0002805681818181818, "loss": 1.3166, "step": 3000 }, { "epoch": 1.65, "learning_rate": 0.0002757077922077922, "loss": 1.2577, "step": 3500 }, { "epoch": 1.89, "learning_rate": 0.0002708376623376623, "loss": 1.2068, "step": 4000 }, { "epoch": 1.89, "eval_cer": 0.21168921247442604, "eval_loss": 0.8059473037719727, "eval_runtime": 2690.7607, "eval_samples_per_second": 6.286, "eval_steps_per_second": 0.393, "step": 4000 }, { "epoch": 2.12, "learning_rate": 0.00026596753246753245, "loss": 1.1463, "step": 4500 }, { "epoch": 2.36, "learning_rate": 0.0002610974025974026, "loss": 1.0865, "step": 5000 }, { "epoch": 2.59, "learning_rate": 0.000256237012987013, "loss": 1.0765, "step": 5500 }, { "epoch": 2.83, "learning_rate": 0.0002513668831168831, "loss": 1.0519, "step": 6000 }, { "epoch": 2.83, "eval_cer": 0.17837652734942439, "eval_loss": 0.6763827800750732, "eval_runtime": 2755.5593, "eval_samples_per_second": 6.138, "eval_steps_per_second": 0.384, "step": 6000 }, { "epoch": 3.07, "learning_rate": 0.00024649675324675324, "loss": 1.0204, "step": 6500 }, { "epoch": 3.3, "learning_rate": 0.00024162662337662335, "loss": 0.9806, "step": 7000 }, { "epoch": 3.54, "learning_rate": 0.0002367564935064935, "loss": 0.9693, "step": 7500 }, { "epoch": 3.77, "learning_rate": 0.00023189610389610387, "loss": 0.9522, "step": 8000 }, { "epoch": 3.77, "eval_cer": 0.1666790265096821, "eval_loss": 0.6237545609474182, "eval_runtime": 2657.2446, "eval_samples_per_second": 6.365, "eval_steps_per_second": 0.398, "step": 8000 }, { "epoch": 4.01, "learning_rate": 0.000227025974025974, "loss": 0.9543, "step": 8500 }, { "epoch": 4.24, "learning_rate": 0.00022215584415584413, "loss": 0.9079, "step": 9000 }, { "epoch": 4.48, "learning_rate": 0.00021728571428571427, "loss": 0.9006, "step": 9500 }, { "epoch": 4.72, "learning_rate": 0.00021242532467532466, "loss": 0.8855, "step": 10000 }, { "epoch": 4.72, "eval_cer": 0.15723901464423282, "eval_loss": 0.5901318788528442, "eval_runtime": 2755.8864, "eval_samples_per_second": 6.137, "eval_steps_per_second": 0.384, "step": 10000 }, { "epoch": 4.95, "learning_rate": 0.0002075551948051948, "loss": 0.8858, "step": 10500 }, { "epoch": 5.19, "learning_rate": 0.00020268506493506492, "loss": 0.8449, "step": 11000 }, { "epoch": 5.42, "learning_rate": 0.0001978246753246753, "loss": 0.8309, "step": 11500 }, { "epoch": 5.66, "learning_rate": 0.00019295454545454545, "loss": 0.8353, "step": 12000 }, { "epoch": 5.66, "eval_cer": 0.14726462133076249, "eval_loss": 0.5560417175292969, "eval_runtime": 2734.9608, "eval_samples_per_second": 6.184, "eval_steps_per_second": 0.387, "step": 12000 }, { "epoch": 5.89, "learning_rate": 0.00018808441558441555, "loss": 0.8255, "step": 12500 }, { "epoch": 6.13, "learning_rate": 0.0001832142857142857, "loss": 0.808, "step": 13000 }, { "epoch": 6.37, "learning_rate": 0.00017834415584415584, "loss": 0.7827, "step": 13500 }, { "epoch": 6.6, "learning_rate": 0.00017347402597402595, "loss": 0.7765, "step": 14000 }, { "epoch": 6.6, "eval_cer": 0.14182047400725015, "eval_loss": 0.5313007831573486, "eval_runtime": 2764.1566, "eval_samples_per_second": 6.119, "eval_steps_per_second": 0.383, "step": 14000 }, { "epoch": 6.84, "learning_rate": 0.00016861363636363634, "loss": 0.7766, "step": 14500 }, { "epoch": 7.07, "learning_rate": 0.00016375324675324673, "loss": 0.7511, "step": 15000 }, { "epoch": 7.31, "learning_rate": 0.0001588831168831169, "loss": 0.7287, "step": 15500 }, { "epoch": 7.55, "learning_rate": 0.000154012987012987, "loss": 0.7333, "step": 16000 }, { "epoch": 7.55, "eval_cer": 0.13388763594009692, "eval_loss": 0.5099530220031738, "eval_runtime": 2760.9388, "eval_samples_per_second": 6.126, "eval_steps_per_second": 0.383, "step": 16000 }, { "epoch": 7.78, "learning_rate": 0.00014914285714285713, "loss": 0.7285, "step": 16500 }, { "epoch": 8.02, "learning_rate": 0.00014427272727272726, "loss": 0.719, "step": 17000 }, { "epoch": 8.25, "learning_rate": 0.0001394025974025974, "loss": 0.6828, "step": 17500 }, { "epoch": 8.49, "learning_rate": 0.00013453246753246753, "loss": 0.6887, "step": 18000 }, { "epoch": 8.49, "eval_cer": 0.13040652250727414, "eval_loss": 0.49017849564552307, "eval_runtime": 2752.9478, "eval_samples_per_second": 6.144, "eval_steps_per_second": 0.384, "step": 18000 }, { "epoch": 8.72, "learning_rate": 0.00012966233766233766, "loss": 0.6893, "step": 18500 }, { "epoch": 8.96, "learning_rate": 0.00012480194805194805, "loss": 0.6834, "step": 19000 }, { "epoch": 9.2, "learning_rate": 0.00011993181818181817, "loss": 0.6572, "step": 19500 }, { "epoch": 9.43, "learning_rate": 0.00011506168831168829, "loss": 0.6547, "step": 20000 }, { "epoch": 9.43, "eval_cer": 0.12521102614324914, "eval_loss": 0.47851961851119995, "eval_runtime": 2750.0743, "eval_samples_per_second": 6.15, "eval_steps_per_second": 0.385, "step": 20000 }, { "epoch": 9.67, "learning_rate": 0.00011019155844155843, "loss": 0.6438, "step": 20500 }, { "epoch": 9.9, "learning_rate": 0.00010533116883116881, "loss": 0.6419, "step": 21000 }, { "epoch": 10.14, "learning_rate": 0.00010046103896103896, "loss": 0.6238, "step": 21500 }, { "epoch": 10.37, "learning_rate": 9.560064935064933e-05, "loss": 0.612, "step": 22000 }, { "epoch": 10.37, "eval_cer": 0.1199806313989452, "eval_loss": 0.45936959981918335, "eval_runtime": 2703.4007, "eval_samples_per_second": 6.257, "eval_steps_per_second": 0.391, "step": 22000 }, { "epoch": 10.61, "learning_rate": 9.073051948051948e-05, "loss": 0.6138, "step": 22500 }, { "epoch": 10.85, "learning_rate": 8.58603896103896e-05, "loss": 0.6128, "step": 23000 }, { "epoch": 11.08, "learning_rate": 8.099025974025973e-05, "loss": 0.6003, "step": 23500 }, { "epoch": 11.32, "learning_rate": 7.612987012987012e-05, "loss": 0.5855, "step": 24000 }, { "epoch": 11.32, "eval_cer": 0.1176489833665595, "eval_loss": 0.4468931555747986, "eval_runtime": 2737.3716, "eval_samples_per_second": 6.179, "eval_steps_per_second": 0.387, "step": 24000 }, { "epoch": 11.55, "learning_rate": 7.125974025974026e-05, "loss": 0.5766, "step": 24500 }, { "epoch": 11.79, "learning_rate": 6.638961038961039e-05, "loss": 0.5821, "step": 25000 }, { "epoch": 12.03, "learning_rate": 6.151948051948051e-05, "loss": 0.5765, "step": 25500 }, { "epoch": 12.26, "learning_rate": 5.664935064935064e-05, "loss": 0.5538, "step": 26000 }, { "epoch": 12.26, "eval_cer": 0.11556380514489371, "eval_loss": 0.4398212134838104, "eval_runtime": 2705.0289, "eval_samples_per_second": 6.253, "eval_steps_per_second": 0.391, "step": 26000 }, { "epoch": 12.5, "learning_rate": 5.178896103896104e-05, "loss": 0.5612, "step": 26500 }, { "epoch": 12.73, "learning_rate": 4.691883116883116e-05, "loss": 0.5511, "step": 27000 }, { "epoch": 12.97, "learning_rate": 4.2048701298701296e-05, "loss": 0.5497, "step": 27500 }, { "epoch": 13.2, "learning_rate": 3.717857142857143e-05, "loss": 0.5341, "step": 28000 }, { "epoch": 13.2, "eval_cer": 0.11241640747348813, "eval_loss": 0.43176981806755066, "eval_runtime": 2707.434, "eval_samples_per_second": 6.247, "eval_steps_per_second": 0.391, "step": 28000 }, { "epoch": 13.44, "learning_rate": 3.231818181818181e-05, "loss": 0.5317, "step": 28500 }, { "epoch": 13.68, "learning_rate": 2.7448051948051945e-05, "loss": 0.5315, "step": 29000 }, { "epoch": 13.91, "learning_rate": 2.2577922077922077e-05, "loss": 0.5265, "step": 29500 }, { "epoch": 14.15, "learning_rate": 1.7707792207792207e-05, "loss": 0.5229, "step": 30000 }, { "epoch": 14.15, "eval_cer": 0.11112952970070276, "eval_loss": 0.4251420199871063, "eval_runtime": 2738.5837, "eval_samples_per_second": 6.176, "eval_steps_per_second": 0.386, "step": 30000 }, { "epoch": 14.38, "learning_rate": 1.2837662337662336e-05, "loss": 0.5171, "step": 30500 }, { "epoch": 14.62, "learning_rate": 7.967532467532467e-06, "loss": 0.5147, "step": 31000 }, { "epoch": 14.85, "learning_rate": 3.1071428571428566e-06, "loss": 0.5162, "step": 31500 }, { "epoch": 15.0, "step": 31800, "total_flos": 2.1355952953477825e+20, "train_loss": 1.0674577844967632, "train_runtime": 77454.0437, "train_samples_per_second": 26.282, "train_steps_per_second": 0.411 } ], "logging_steps": 500, "max_steps": 31800, "num_train_epochs": 15, "save_steps": 2000, "total_flos": 2.1355952953477825e+20, "trial_name": null, "trial_params": null }