|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 300, |
|
"global_step": 16425, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 3.293024778366089, |
|
"eval_runtime": 329.2436, |
|
"eval_samples_per_second": 33.516, |
|
"eval_steps_per_second": 1.048, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0007389237223065434, |
|
"loss": 5.6462, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 3.415907859802246, |
|
"eval_runtime": 328.5335, |
|
"eval_samples_per_second": 33.589, |
|
"eval_steps_per_second": 1.05, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 3.4422173500061035, |
|
"eval_runtime": 338.1217, |
|
"eval_samples_per_second": 32.636, |
|
"eval_steps_per_second": 1.02, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0014838065068897525, |
|
"loss": 3.3522, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 3.3719358444213867, |
|
"eval_runtime": 340.7912, |
|
"eval_samples_per_second": 32.381, |
|
"eval_steps_per_second": 1.012, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.002228689291472962, |
|
"loss": 3.2605, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 3.4025769233703613, |
|
"eval_runtime": 335.1987, |
|
"eval_samples_per_second": 32.921, |
|
"eval_steps_per_second": 1.029, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 3.444835662841797, |
|
"eval_runtime": 332.4199, |
|
"eval_samples_per_second": 33.196, |
|
"eval_steps_per_second": 1.038, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0029735720760561708, |
|
"loss": 3.2766, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 3.473637104034424, |
|
"eval_runtime": 334.5697, |
|
"eval_samples_per_second": 32.983, |
|
"eval_steps_per_second": 1.031, |
|
"eval_wer": 0.9999093792478477, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 3.982806444168091, |
|
"eval_runtime": 342.0917, |
|
"eval_samples_per_second": 32.257, |
|
"eval_steps_per_second": 1.009, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0037184548606393796, |
|
"loss": 3.2853, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 3.553187370300293, |
|
"eval_runtime": 329.7168, |
|
"eval_samples_per_second": 33.468, |
|
"eval_steps_per_second": 1.046, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.004460358114084256, |
|
"loss": 3.3389, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 3.781858444213867, |
|
"eval_runtime": 323.3731, |
|
"eval_samples_per_second": 34.125, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 3.2249505519866943, |
|
"eval_runtime": 323.988, |
|
"eval_samples_per_second": 34.06, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.005205240898667465, |
|
"loss": 3.2186, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_loss": 3.2372846603393555, |
|
"eval_runtime": 324.034, |
|
"eval_samples_per_second": 34.055, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 3.2161905765533447, |
|
"eval_runtime": 323.7754, |
|
"eval_samples_per_second": 34.082, |
|
"eval_steps_per_second": 1.066, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.005950123683250674, |
|
"loss": 3.1916, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 3.2367777824401855, |
|
"eval_runtime": 323.3817, |
|
"eval_samples_per_second": 34.124, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0066950064678338835, |
|
"loss": 3.2188, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 3.2376551628112793, |
|
"eval_runtime": 323.9231, |
|
"eval_samples_per_second": 34.067, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 3.4206786155700684, |
|
"eval_runtime": 324.1643, |
|
"eval_samples_per_second": 34.041, |
|
"eval_steps_per_second": 1.064, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 5.3067, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_loss": NaN, |
|
"eval_runtime": 324.7836, |
|
"eval_samples_per_second": 33.976, |
|
"eval_steps_per_second": 1.062, |
|
"eval_wer": 1.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.844, |
|
"eval_samples_per_second": 34.075, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.0895, |
|
"eval_samples_per_second": 34.155, |
|
"eval_steps_per_second": 1.068, |
|
"eval_wer": 1.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.9944, |
|
"eval_samples_per_second": 34.059, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": NaN, |
|
"eval_runtime": 324.3089, |
|
"eval_samples_per_second": 34.026, |
|
"eval_steps_per_second": 1.064, |
|
"eval_wer": 1.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.6949, |
|
"eval_samples_per_second": 34.091, |
|
"eval_steps_per_second": 1.066, |
|
"eval_wer": 1.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.4351, |
|
"eval_samples_per_second": 34.118, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 1.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.8786, |
|
"eval_samples_per_second": 34.071, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.8341, |
|
"eval_samples_per_second": 34.076, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.3556, |
|
"eval_samples_per_second": 34.127, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 1.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.4333, |
|
"eval_samples_per_second": 34.118, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 1.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.7546, |
|
"eval_samples_per_second": 34.084, |
|
"eval_steps_per_second": 1.066, |
|
"eval_wer": 1.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.2121, |
|
"eval_samples_per_second": 34.142, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 1.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": NaN, |
|
"eval_runtime": 322.4572, |
|
"eval_samples_per_second": 34.222, |
|
"eval_steps_per_second": 1.07, |
|
"eval_wer": 1.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": NaN, |
|
"eval_runtime": 318.5677, |
|
"eval_samples_per_second": 34.639, |
|
"eval_steps_per_second": 1.083, |
|
"eval_wer": 1.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"eval_loss": NaN, |
|
"eval_runtime": 319.654, |
|
"eval_samples_per_second": 34.522, |
|
"eval_steps_per_second": 1.079, |
|
"eval_wer": 1.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"eval_loss": NaN, |
|
"eval_runtime": 320.9193, |
|
"eval_samples_per_second": 34.386, |
|
"eval_steps_per_second": 1.075, |
|
"eval_wer": 1.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"eval_loss": NaN, |
|
"eval_runtime": 322.8537, |
|
"eval_samples_per_second": 34.18, |
|
"eval_steps_per_second": 1.069, |
|
"eval_wer": 1.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"eval_loss": NaN, |
|
"eval_runtime": 321.9757, |
|
"eval_samples_per_second": 34.273, |
|
"eval_steps_per_second": 1.072, |
|
"eval_wer": 1.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.2439, |
|
"eval_samples_per_second": 34.138, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 1.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.9537, |
|
"eval_samples_per_second": 34.064, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.8438, |
|
"eval_samples_per_second": 34.075, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.9188, |
|
"eval_samples_per_second": 34.067, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.8826, |
|
"eval_samples_per_second": 34.071, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"eval_loss": NaN, |
|
"eval_runtime": 325.2969, |
|
"eval_samples_per_second": 33.923, |
|
"eval_steps_per_second": 1.061, |
|
"eval_wer": 1.0, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"eval_loss": NaN, |
|
"eval_runtime": 324.7333, |
|
"eval_samples_per_second": 33.982, |
|
"eval_steps_per_second": 1.062, |
|
"eval_wer": 1.0, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"eval_loss": NaN, |
|
"eval_runtime": 324.3204, |
|
"eval_samples_per_second": 34.025, |
|
"eval_steps_per_second": 1.064, |
|
"eval_wer": 1.0, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"eval_loss": NaN, |
|
"eval_runtime": 324.8823, |
|
"eval_samples_per_second": 33.966, |
|
"eval_steps_per_second": 1.062, |
|
"eval_wer": 1.0, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.9396, |
|
"eval_samples_per_second": 34.065, |
|
"eval_steps_per_second": 1.065, |
|
"eval_wer": 1.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"eval_loss": NaN, |
|
"eval_runtime": 326.2533, |
|
"eval_samples_per_second": 33.823, |
|
"eval_steps_per_second": 1.057, |
|
"eval_wer": 1.0, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.7094, |
|
"eval_samples_per_second": 34.089, |
|
"eval_steps_per_second": 1.066, |
|
"eval_wer": 1.0, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"eval_loss": NaN, |
|
"eval_runtime": 325.3499, |
|
"eval_samples_per_second": 33.917, |
|
"eval_steps_per_second": 1.06, |
|
"eval_wer": 1.0, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 13.42, |
|
"eval_loss": NaN, |
|
"eval_runtime": 325.7039, |
|
"eval_samples_per_second": 33.88, |
|
"eval_steps_per_second": 1.059, |
|
"eval_wer": 1.0, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"eval_loss": NaN, |
|
"eval_runtime": 323.516, |
|
"eval_samples_per_second": 34.11, |
|
"eval_steps_per_second": 1.066, |
|
"eval_wer": 1.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_loss": NaN, |
|
"eval_runtime": 471.9655, |
|
"eval_samples_per_second": 23.381, |
|
"eval_steps_per_second": 0.731, |
|
"eval_wer": 1.0, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"eval_loss": NaN, |
|
"eval_runtime": 324.2368, |
|
"eval_samples_per_second": 34.034, |
|
"eval_steps_per_second": 1.064, |
|
"eval_wer": 1.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"eval_loss": NaN, |
|
"eval_runtime": 325.6971, |
|
"eval_samples_per_second": 33.881, |
|
"eval_steps_per_second": 1.059, |
|
"eval_wer": 1.0, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 0.007152364497567974, |
|
"loss": 0.0, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"eval_loss": NaN, |
|
"eval_runtime": 325.4219, |
|
"eval_samples_per_second": 33.91, |
|
"eval_steps_per_second": 1.06, |
|
"eval_wer": 1.0, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 16425, |
|
"total_flos": 6.442470243808035e+19, |
|
"train_loss": 1.129231213434646, |
|
"train_runtime": 45647.7738, |
|
"train_samples_per_second": 14.392, |
|
"train_steps_per_second": 0.36 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16425, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 400, |
|
"total_flos": 6.442470243808035e+19, |
|
"train_batch_size": 20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|