|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 200.0, |
|
"global_step": 10400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.000148, |
|
"loss": 5.649, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_loss": 3.0038444995880127, |
|
"eval_runtime": 14.1536, |
|
"eval_samples_per_second": 20.772, |
|
"eval_steps_per_second": 0.707, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 0.00029800000000000003, |
|
"loss": 2.9978, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 0.00039600000000000003, |
|
"loss": 1.6272, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_loss": 0.7362223267555237, |
|
"eval_runtime": 13.8179, |
|
"eval_samples_per_second": 21.277, |
|
"eval_steps_per_second": 0.724, |
|
"eval_wer": 0.781941309255079, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 0.0003835, |
|
"loss": 1.2822, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.000371, |
|
"loss": 1.1354, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"eval_loss": 0.6409761309623718, |
|
"eval_runtime": 13.8178, |
|
"eval_samples_per_second": 21.277, |
|
"eval_steps_per_second": 0.724, |
|
"eval_wer": 0.7110609480812641, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 34.62, |
|
"learning_rate": 0.00035850000000000004, |
|
"loss": 1.0424, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_loss": 0.6907294392585754, |
|
"eval_runtime": 13.9619, |
|
"eval_samples_per_second": 21.057, |
|
"eval_steps_per_second": 0.716, |
|
"eval_wer": 0.7431151241534989, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 40.38, |
|
"learning_rate": 0.000346, |
|
"loss": 0.9872, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 0.00033350000000000003, |
|
"loss": 0.9293, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"eval_loss": 0.7248561978340149, |
|
"eval_runtime": 14.1849, |
|
"eval_samples_per_second": 20.726, |
|
"eval_steps_per_second": 0.705, |
|
"eval_wer": 0.7101580135440181, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 51.92, |
|
"learning_rate": 0.000321, |
|
"loss": 0.8747, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0003085, |
|
"loss": 0.8246, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"eval_loss": 0.7421836853027344, |
|
"eval_runtime": 14.4192, |
|
"eval_samples_per_second": 20.39, |
|
"eval_steps_per_second": 0.694, |
|
"eval_wer": 0.6966139954853273, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 63.46, |
|
"learning_rate": 0.000296, |
|
"loss": 0.7837, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 67.31, |
|
"eval_loss": 0.7412946820259094, |
|
"eval_runtime": 14.1823, |
|
"eval_samples_per_second": 20.73, |
|
"eval_steps_per_second": 0.705, |
|
"eval_wer": 0.6812641083521445, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 69.23, |
|
"learning_rate": 0.0002835, |
|
"loss": 0.7527, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 0.00027100000000000003, |
|
"loss": 0.7147, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_loss": 0.7873469591140747, |
|
"eval_runtime": 13.9067, |
|
"eval_samples_per_second": 21.141, |
|
"eval_steps_per_second": 0.719, |
|
"eval_wer": 0.6930022573363431, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 80.77, |
|
"learning_rate": 0.0002585, |
|
"loss": 0.6779, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 86.54, |
|
"learning_rate": 0.000246, |
|
"loss": 0.6276, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 86.54, |
|
"eval_loss": 0.8037810921669006, |
|
"eval_runtime": 14.0837, |
|
"eval_samples_per_second": 20.875, |
|
"eval_steps_per_second": 0.71, |
|
"eval_wer": 0.6677200902934537, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"learning_rate": 0.0002335, |
|
"loss": 0.6041, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"eval_loss": 0.8240488767623901, |
|
"eval_runtime": 13.9338, |
|
"eval_samples_per_second": 21.1, |
|
"eval_steps_per_second": 0.718, |
|
"eval_wer": 0.6830699774266366, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 98.08, |
|
"learning_rate": 0.000221, |
|
"loss": 0.5588, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 103.85, |
|
"learning_rate": 0.0002085, |
|
"loss": 0.5336, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 105.77, |
|
"eval_loss": 0.8747946619987488, |
|
"eval_runtime": 13.9807, |
|
"eval_samples_per_second": 21.029, |
|
"eval_steps_per_second": 0.715, |
|
"eval_wer": 0.6749435665914221, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 109.62, |
|
"learning_rate": 0.000196, |
|
"loss": 0.5, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"learning_rate": 0.00018350000000000002, |
|
"loss": 0.4705, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"eval_loss": 0.9005643129348755, |
|
"eval_runtime": 13.867, |
|
"eval_samples_per_second": 21.201, |
|
"eval_steps_per_second": 0.721, |
|
"eval_wer": 0.6496613995485327, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 121.15, |
|
"learning_rate": 0.00017104166666666667, |
|
"loss": 0.43, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 0.8953593969345093, |
|
"eval_runtime": 13.9621, |
|
"eval_samples_per_second": 21.057, |
|
"eval_steps_per_second": 0.716, |
|
"eval_wer": 0.655079006772009, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 126.92, |
|
"learning_rate": 0.00015854166666666667, |
|
"loss": 0.4068, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 132.69, |
|
"learning_rate": 0.0001460416666666667, |
|
"loss": 0.3859, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 134.62, |
|
"eval_loss": 0.9073536396026611, |
|
"eval_runtime": 13.9518, |
|
"eval_samples_per_second": 21.072, |
|
"eval_steps_per_second": 0.717, |
|
"eval_wer": 0.6613995485327314, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 138.46, |
|
"learning_rate": 0.00013354166666666668, |
|
"loss": 0.3622, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 144.23, |
|
"learning_rate": 0.00012104166666666668, |
|
"loss": 0.3342, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 144.23, |
|
"eval_loss": 0.9693499803543091, |
|
"eval_runtime": 13.8467, |
|
"eval_samples_per_second": 21.233, |
|
"eval_steps_per_second": 0.722, |
|
"eval_wer": 0.6559819413092551, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 0.00010854166666666667, |
|
"loss": 0.3155, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_loss": 1.0072590112686157, |
|
"eval_runtime": 13.7449, |
|
"eval_samples_per_second": 21.39, |
|
"eval_steps_per_second": 0.728, |
|
"eval_wer": 0.6690744920993228, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 155.77, |
|
"learning_rate": 9.604166666666668e-05, |
|
"loss": 0.2894, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 161.54, |
|
"learning_rate": 8.358333333333334e-05, |
|
"loss": 0.2673, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 163.46, |
|
"eval_loss": 1.0170269012451172, |
|
"eval_runtime": 14.0595, |
|
"eval_samples_per_second": 20.911, |
|
"eval_steps_per_second": 0.711, |
|
"eval_wer": 0.6632054176072235, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 167.31, |
|
"learning_rate": 7.108333333333333e-05, |
|
"loss": 0.2517, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 173.08, |
|
"learning_rate": 5.858333333333333e-05, |
|
"loss": 0.2409, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 173.08, |
|
"eval_loss": 1.0304286479949951, |
|
"eval_runtime": 13.8942, |
|
"eval_samples_per_second": 21.16, |
|
"eval_steps_per_second": 0.72, |
|
"eval_wer": 0.6708803611738149, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 178.85, |
|
"learning_rate": 4.608333333333333e-05, |
|
"loss": 0.2189, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 182.69, |
|
"eval_loss": 0.9965260624885559, |
|
"eval_runtime": 14.2442, |
|
"eval_samples_per_second": 20.64, |
|
"eval_steps_per_second": 0.702, |
|
"eval_wer": 0.654627539503386, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 184.62, |
|
"learning_rate": 3.3625000000000004e-05, |
|
"loss": 0.203, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 190.38, |
|
"learning_rate": 2.1125000000000002e-05, |
|
"loss": 0.1973, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"eval_loss": 1.0360474586486816, |
|
"eval_runtime": 14.1087, |
|
"eval_samples_per_second": 20.838, |
|
"eval_steps_per_second": 0.709, |
|
"eval_wer": 0.655079006772009, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 196.15, |
|
"learning_rate": 8.625e-06, |
|
"loss": 0.1881, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"step": 10400, |
|
"total_flos": 3.0179570579437056e+19, |
|
"train_loss": 0.8014375554598295, |
|
"train_runtime": 12807.5215, |
|
"train_samples_per_second": 12.93, |
|
"train_steps_per_second": 0.812 |
|
} |
|
], |
|
"max_steps": 10400, |
|
"num_train_epochs": 200, |
|
"total_flos": 3.0179570579437056e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|