|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 13.204432916764913, |
|
"eval_steps": 2000, |
|
"global_step": 28000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014879999999999998, |
|
"loss": 14.6823, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002985, |
|
"loss": 4.0589, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002951785714285714, |
|
"loss": 1.959, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00029030844155844153, |
|
"loss": 1.5961, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_cer": 0.27122803037904003, |
|
"eval_loss": 1.068536400794983, |
|
"eval_runtime": 2688.7775, |
|
"eval_samples_per_second": 6.291, |
|
"eval_steps_per_second": 0.393, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00028543831168831166, |
|
"loss": 1.4159, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0002805681818181818, |
|
"loss": 1.3166, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0002757077922077922, |
|
"loss": 1.2577, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002708376623376623, |
|
"loss": 1.2068, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_cer": 0.21168921247442604, |
|
"eval_loss": 0.8059473037719727, |
|
"eval_runtime": 2690.7607, |
|
"eval_samples_per_second": 6.286, |
|
"eval_steps_per_second": 0.393, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00026596753246753245, |
|
"loss": 1.1463, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0002610974025974026, |
|
"loss": 1.0865, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.000256237012987013, |
|
"loss": 1.0765, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0002513668831168831, |
|
"loss": 1.0519, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_cer": 0.17837652734942439, |
|
"eval_loss": 0.6763827800750732, |
|
"eval_runtime": 2755.5593, |
|
"eval_samples_per_second": 6.138, |
|
"eval_steps_per_second": 0.384, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00024649675324675324, |
|
"loss": 1.0204, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00024162662337662335, |
|
"loss": 0.9806, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0002367564935064935, |
|
"loss": 0.9693, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00023189610389610387, |
|
"loss": 0.9522, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_cer": 0.1666790265096821, |
|
"eval_loss": 0.6237545609474182, |
|
"eval_runtime": 2657.2446, |
|
"eval_samples_per_second": 6.365, |
|
"eval_steps_per_second": 0.398, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.000227025974025974, |
|
"loss": 0.9543, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00022215584415584413, |
|
"loss": 0.9079, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00021728571428571427, |
|
"loss": 0.9006, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00021242532467532466, |
|
"loss": 0.8855, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_cer": 0.15723901464423282, |
|
"eval_loss": 0.5901318788528442, |
|
"eval_runtime": 2755.8864, |
|
"eval_samples_per_second": 6.137, |
|
"eval_steps_per_second": 0.384, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0002075551948051948, |
|
"loss": 0.8858, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00020268506493506492, |
|
"loss": 0.8449, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0001978246753246753, |
|
"loss": 0.8309, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00019295454545454545, |
|
"loss": 0.8353, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_cer": 0.14726462133076249, |
|
"eval_loss": 0.5560417175292969, |
|
"eval_runtime": 2734.9608, |
|
"eval_samples_per_second": 6.184, |
|
"eval_steps_per_second": 0.387, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00018808441558441555, |
|
"loss": 0.8255, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.0001832142857142857, |
|
"loss": 0.808, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00017834415584415584, |
|
"loss": 0.7827, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00017347402597402595, |
|
"loss": 0.7765, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_cer": 0.14182047400725015, |
|
"eval_loss": 0.5313007831573486, |
|
"eval_runtime": 2764.1566, |
|
"eval_samples_per_second": 6.119, |
|
"eval_steps_per_second": 0.383, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00016861363636363634, |
|
"loss": 0.7766, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00016375324675324673, |
|
"loss": 0.7511, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0001588831168831169, |
|
"loss": 0.7287, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.000154012987012987, |
|
"loss": 0.7333, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_cer": 0.13388763594009692, |
|
"eval_loss": 0.5099530220031738, |
|
"eval_runtime": 2760.9388, |
|
"eval_samples_per_second": 6.126, |
|
"eval_steps_per_second": 0.383, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00014914285714285713, |
|
"loss": 0.7285, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.00014427272727272726, |
|
"loss": 0.719, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.0001394025974025974, |
|
"loss": 0.6828, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.00013453246753246753, |
|
"loss": 0.6887, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_cer": 0.13040652250727414, |
|
"eval_loss": 0.49017849564552307, |
|
"eval_runtime": 2752.9478, |
|
"eval_samples_per_second": 6.144, |
|
"eval_steps_per_second": 0.384, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.00012966233766233766, |
|
"loss": 0.6893, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.00012480194805194805, |
|
"loss": 0.6834, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.00011993181818181817, |
|
"loss": 0.6572, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.00011506168831168829, |
|
"loss": 0.6547, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_cer": 0.12521102614324914, |
|
"eval_loss": 0.47851961851119995, |
|
"eval_runtime": 2750.0743, |
|
"eval_samples_per_second": 6.15, |
|
"eval_steps_per_second": 0.385, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00011019155844155843, |
|
"loss": 0.6438, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.00010533116883116881, |
|
"loss": 0.6419, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.00010046103896103896, |
|
"loss": 0.6238, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 9.560064935064933e-05, |
|
"loss": 0.612, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_cer": 0.1199806313989452, |
|
"eval_loss": 0.45936959981918335, |
|
"eval_runtime": 2703.4007, |
|
"eval_samples_per_second": 6.257, |
|
"eval_steps_per_second": 0.391, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 9.073051948051948e-05, |
|
"loss": 0.6138, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 8.58603896103896e-05, |
|
"loss": 0.6128, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 8.099025974025973e-05, |
|
"loss": 0.6003, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 7.612987012987012e-05, |
|
"loss": 0.5855, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_cer": 0.1176489833665595, |
|
"eval_loss": 0.4468931555747986, |
|
"eval_runtime": 2737.3716, |
|
"eval_samples_per_second": 6.179, |
|
"eval_steps_per_second": 0.387, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 7.125974025974026e-05, |
|
"loss": 0.5766, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 6.638961038961039e-05, |
|
"loss": 0.5821, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 6.151948051948051e-05, |
|
"loss": 0.5765, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 5.664935064935064e-05, |
|
"loss": 0.5538, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_cer": 0.11556380514489371, |
|
"eval_loss": 0.4398212134838104, |
|
"eval_runtime": 2705.0289, |
|
"eval_samples_per_second": 6.253, |
|
"eval_steps_per_second": 0.391, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 5.178896103896104e-05, |
|
"loss": 0.5612, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 4.691883116883116e-05, |
|
"loss": 0.5511, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 4.2048701298701296e-05, |
|
"loss": 0.5497, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 3.717857142857143e-05, |
|
"loss": 0.5341, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"eval_cer": 0.11241640747348813, |
|
"eval_loss": 0.43176981806755066, |
|
"eval_runtime": 2707.434, |
|
"eval_samples_per_second": 6.247, |
|
"eval_steps_per_second": 0.391, |
|
"step": 28000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 31800, |
|
"num_train_epochs": 15, |
|
"save_steps": 2000, |
|
"total_flos": 1.8803847061079222e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|