|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.996463098325867, |
|
"eval_steps": 2000, |
|
"global_step": 31800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014879999999999998, |
|
"loss": 14.5956, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002985, |
|
"loss": 3.7568, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002951785714285714, |
|
"loss": 1.8728, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00029030844155844153, |
|
"loss": 1.5566, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_cer": 0.2632472070390033, |
|
"eval_loss": 1.0225796699523926, |
|
"eval_runtime": 2668.5503, |
|
"eval_samples_per_second": 6.338, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 0.618374838459038, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00028543831168831166, |
|
"loss": 1.3765, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0002805681818181818, |
|
"loss": 1.2884, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0002757077922077922, |
|
"loss": 1.2363, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002708376623376623, |
|
"loss": 1.179, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_cer": 0.20008986332921824, |
|
"eval_loss": 0.7681829333305359, |
|
"eval_runtime": 2604.8981, |
|
"eval_samples_per_second": 6.493, |
|
"eval_steps_per_second": 0.406, |
|
"eval_wer": 0.49895741661315, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00026596753246753245, |
|
"loss": 1.1289, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0002610974025974026, |
|
"loss": 1.0784, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.000256237012987013, |
|
"loss": 1.0664, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0002513668831168831, |
|
"loss": 1.0432, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_cer": 0.17490413851167133, |
|
"eval_loss": 0.6633431315422058, |
|
"eval_runtime": 2540.6608, |
|
"eval_samples_per_second": 6.657, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.45160906690768976, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00024649675324675324, |
|
"loss": 1.0052, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00024162662337662335, |
|
"loss": 0.9657, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0002367564935064935, |
|
"loss": 0.9627, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00023189610389610387, |
|
"loss": 0.9413, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_cer": 0.16238652573537432, |
|
"eval_loss": 0.615895688533783, |
|
"eval_runtime": 2544.7169, |
|
"eval_samples_per_second": 6.647, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.4259210807982304, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.000227025974025974, |
|
"loss": 0.9393, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00022215584415584413, |
|
"loss": 0.8932, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00021728571428571427, |
|
"loss": 0.8898, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00021242532467532466, |
|
"loss": 0.8765, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_cer": 0.15377316925278206, |
|
"eval_loss": 0.5792471766471863, |
|
"eval_runtime": 2541.286, |
|
"eval_samples_per_second": 6.656, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.40610010386191914, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0002075551948051948, |
|
"loss": 0.874, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00020268506493506492, |
|
"loss": 0.8393, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0001978246753246753, |
|
"loss": 0.8254, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00019295454545454545, |
|
"loss": 0.8248, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_cer": 0.14464288051230823, |
|
"eval_loss": 0.5455637574195862, |
|
"eval_runtime": 2603.5132, |
|
"eval_samples_per_second": 6.497, |
|
"eval_steps_per_second": 0.406, |
|
"eval_wer": 0.3876665953111497, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00018808441558441555, |
|
"loss": 0.815, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.0001832142857142857, |
|
"loss": 0.7914, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00017834415584415584, |
|
"loss": 0.7721, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00017347402597402595, |
|
"loss": 0.7714, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_cer": 0.1396851293639334, |
|
"eval_loss": 0.5316255688667297, |
|
"eval_runtime": 2697.2289, |
|
"eval_samples_per_second": 6.271, |
|
"eval_steps_per_second": 0.392, |
|
"eval_wer": 0.37104868824774634, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00016861363636363634, |
|
"loss": 0.7775, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00016375324675324673, |
|
"loss": 0.7576, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0001588831168831169, |
|
"loss": 0.74, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.000154012987012987, |
|
"loss": 0.7388, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_cer": 0.13560201887129913, |
|
"eval_loss": 0.5172015428543091, |
|
"eval_runtime": 2608.9857, |
|
"eval_samples_per_second": 6.483, |
|
"eval_steps_per_second": 0.406, |
|
"eval_wer": 0.3657208096472659, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00014914285714285713, |
|
"loss": 0.7315, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.00014427272727272726, |
|
"loss": 0.725, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.0001394025974025974, |
|
"loss": 0.6876, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.00013453246753246753, |
|
"loss": 0.6912, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_cer": 0.12909128980051213, |
|
"eval_loss": 0.4891507625579834, |
|
"eval_runtime": 2622.6389, |
|
"eval_samples_per_second": 6.449, |
|
"eval_steps_per_second": 0.403, |
|
"eval_wer": 0.35079164981883626, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.00012966233766233766, |
|
"loss": 0.6912, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.00012480194805194805, |
|
"loss": 0.6847, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.00011993181818181817, |
|
"loss": 0.6578, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.00011506168831168829, |
|
"loss": 0.6549, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_cer": 0.12405937959404459, |
|
"eval_loss": 0.4693571627140045, |
|
"eval_runtime": 2599.1484, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 0.407, |
|
"eval_wer": 0.33972361629760006, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00011019155844155843, |
|
"loss": 0.646, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.00010533116883116881, |
|
"loss": 0.6467, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.00010046103896103896, |
|
"loss": 0.6287, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 9.560064935064933e-05, |
|
"loss": 0.614, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_cer": 0.12051065054943137, |
|
"eval_loss": 0.461481511592865, |
|
"eval_runtime": 2605.4751, |
|
"eval_samples_per_second": 6.492, |
|
"eval_steps_per_second": 0.406, |
|
"eval_wer": 0.33093895931942696, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 9.073051948051948e-05, |
|
"loss": 0.6153, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 8.58603896103896e-05, |
|
"loss": 0.6157, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 8.099025974025973e-05, |
|
"loss": 0.6054, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 7.612987012987012e-05, |
|
"loss": 0.5901, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_cer": 0.1176620702591641, |
|
"eval_loss": 0.4488585889339447, |
|
"eval_runtime": 2619.3349, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.404, |
|
"eval_wer": 0.32152003107929183, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 7.125974025974026e-05, |
|
"loss": 0.5808, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 6.638961038961039e-05, |
|
"loss": 0.5845, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 6.151948051948051e-05, |
|
"loss": 0.5777, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 5.664935064935064e-05, |
|
"loss": 0.555, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_cer": 0.11478513503492019, |
|
"eval_loss": 0.4419253170490265, |
|
"eval_runtime": 2591.0294, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 0.408, |
|
"eval_wer": 0.31628729316810567, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 5.178896103896104e-05, |
|
"loss": 0.5641, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 4.691883116883116e-05, |
|
"loss": 0.5552, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 4.2048701298701296e-05, |
|
"loss": 0.5523, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 3.717857142857143e-05, |
|
"loss": 0.5377, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"eval_cer": 0.11216557536523336, |
|
"eval_loss": 0.4320293366909027, |
|
"eval_runtime": 2509.0221, |
|
"eval_samples_per_second": 6.741, |
|
"eval_steps_per_second": 0.422, |
|
"eval_wer": 0.3102855013517906, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 3.231818181818181e-05, |
|
"loss": 0.5329, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 2.7448051948051945e-05, |
|
"loss": 0.533, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 2.2577922077922077e-05, |
|
"loss": 0.5289, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.7707792207792207e-05, |
|
"loss": 0.5253, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_cer": 0.11016764309426488, |
|
"eval_loss": 0.4250529706478119, |
|
"eval_runtime": 2522.5601, |
|
"eval_samples_per_second": 6.705, |
|
"eval_steps_per_second": 0.419, |
|
"eval_wer": 0.3051637609114478, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 1.2837662337662336e-05, |
|
"loss": 0.5196, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 7.967532467532467e-06, |
|
"loss": 0.5163, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 3.1071428571428566e-06, |
|
"loss": 0.5197, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 31800, |
|
"total_flos": 2.1355952953477825e+20, |
|
"train_loss": 1.0558512441767087, |
|
"train_runtime": 76407.9259, |
|
"train_samples_per_second": 26.642, |
|
"train_steps_per_second": 0.416 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 31800, |
|
"num_train_epochs": 15, |
|
"save_steps": 2000, |
|
"total_flos": 2.1355952953477825e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|