|
{ |
|
"best_metric": 0.8215018908698001, |
|
"best_model_checkpoint": "/content/wav2vec2-large-xlsr-bn/checkpoint-800", |
|
"epoch": 30.0, |
|
"global_step": 18750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00029405766150560596, |
|
"loss": 4.1339, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 3.2776501178741455, |
|
"eval_runtime": 74.0715, |
|
"eval_samples_per_second": 13.5, |
|
"eval_steps_per_second": 1.688, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00028774693005872927, |
|
"loss": 2.0718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 0.979073703289032, |
|
"eval_runtime": 74.5153, |
|
"eval_samples_per_second": 13.42, |
|
"eval_steps_per_second": 1.678, |
|
"eval_wer": 0.8215018908698001, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00028143619861185264, |
|
"loss": 2.4619, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 3.080274820327759, |
|
"eval_runtime": 74.0712, |
|
"eval_samples_per_second": 13.501, |
|
"eval_steps_per_second": 1.688, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.000275157501334757, |
|
"loss": 3.1703, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 3.4948482513427734, |
|
"eval_runtime": 74.092, |
|
"eval_samples_per_second": 13.497, |
|
"eval_steps_per_second": 1.687, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00026886278697277094, |
|
"loss": 5.5212, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 74.4322, |
|
"eval_samples_per_second": 13.435, |
|
"eval_steps_per_second": 1.679, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0002625680726107848, |
|
"loss": 5.8864, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 74.0924, |
|
"eval_samples_per_second": 13.497, |
|
"eval_steps_per_second": 1.687, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00025628937533368924, |
|
"loss": 5.9244, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 71.9212, |
|
"eval_samples_per_second": 13.904, |
|
"eval_steps_per_second": 1.738, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00024996262680192206, |
|
"loss": 5.8882, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.014, |
|
"eval_samples_per_second": 13.886, |
|
"eval_steps_per_second": 1.736, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00024365189535504537, |
|
"loss": 5.9011, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.5441, |
|
"eval_samples_per_second": 13.785, |
|
"eval_steps_per_second": 1.723, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00023734116390816868, |
|
"loss": 5.8956, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.2548, |
|
"eval_samples_per_second": 13.84, |
|
"eval_steps_per_second": 1.73, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.00023103043246129203, |
|
"loss": 5.9103, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.8707, |
|
"eval_samples_per_second": 13.723, |
|
"eval_steps_per_second": 1.715, |
|
"eval_wer": 1.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.0002247357180993059, |
|
"loss": 5.9083, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.091, |
|
"eval_samples_per_second": 13.871, |
|
"eval_steps_per_second": 1.734, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0002184410037373198, |
|
"loss": 5.9054, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 74.6941, |
|
"eval_samples_per_second": 13.388, |
|
"eval_steps_per_second": 1.673, |
|
"eval_wer": 1.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.00021213027229044314, |
|
"loss": 5.8996, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.5093, |
|
"eval_samples_per_second": 13.791, |
|
"eval_steps_per_second": 1.724, |
|
"eval_wer": 1.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.00020581954084356645, |
|
"loss": 5.8929, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 73.1997, |
|
"eval_samples_per_second": 13.661, |
|
"eval_steps_per_second": 1.708, |
|
"eval_wer": 1.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 0.0001995408435664709, |
|
"loss": 5.9021, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.6358, |
|
"eval_samples_per_second": 13.767, |
|
"eval_steps_per_second": 1.721, |
|
"eval_wer": 1.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.00019323011211959423, |
|
"loss": 5.8853, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.5502, |
|
"eval_samples_per_second": 13.784, |
|
"eval_steps_per_second": 1.723, |
|
"eval_wer": 1.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 0.000186903363587827, |
|
"loss": 5.8968, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.2966, |
|
"eval_samples_per_second": 13.832, |
|
"eval_steps_per_second": 1.729, |
|
"eval_wer": 1.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.00018060864922584088, |
|
"loss": 5.9139, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.4188, |
|
"eval_samples_per_second": 13.809, |
|
"eval_steps_per_second": 1.726, |
|
"eval_wer": 1.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.00017431393486385476, |
|
"loss": 5.8976, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.6699, |
|
"eval_samples_per_second": 13.761, |
|
"eval_steps_per_second": 1.72, |
|
"eval_wer": 1.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 0.00016798718633208754, |
|
"loss": 5.8909, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"eval_loss": 6.071021556854248, |
|
"eval_runtime": 72.1967, |
|
"eval_samples_per_second": 13.851, |
|
"eval_steps_per_second": 1.731, |
|
"eval_wer": 1.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 0.00016202883075280296, |
|
"loss": 7.2473, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"eval_loss": NaN, |
|
"eval_runtime": 74.9323, |
|
"eval_samples_per_second": 13.345, |
|
"eval_steps_per_second": 1.668, |
|
"eval_wer": 1.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 0.000155621996796583, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"eval_loss": NaN, |
|
"eval_runtime": 72.0418, |
|
"eval_samples_per_second": 13.881, |
|
"eval_steps_per_second": 1.735, |
|
"eval_wer": 1.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 0.00014921516284036303, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"eval_loss": NaN, |
|
"eval_runtime": 72.991, |
|
"eval_samples_per_second": 13.7, |
|
"eval_steps_per_second": 1.713, |
|
"eval_wer": 1.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.00014280832888414308, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 74.3256, |
|
"eval_samples_per_second": 13.454, |
|
"eval_steps_per_second": 1.682, |
|
"eval_wer": 1.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 0.0001364014949279231, |
|
"loss": 0.0, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"eval_loss": NaN, |
|
"eval_runtime": 85.1556, |
|
"eval_samples_per_second": 11.743, |
|
"eval_steps_per_second": 1.468, |
|
"eval_wer": 1.0, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 0.00012999466097170315, |
|
"loss": 0.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"eval_loss": NaN, |
|
"eval_runtime": 74.297, |
|
"eval_samples_per_second": 13.46, |
|
"eval_steps_per_second": 1.682, |
|
"eval_wer": 1.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 0.00012358782701548318, |
|
"loss": 0.0, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_loss": NaN, |
|
"eval_runtime": 71.8705, |
|
"eval_samples_per_second": 13.914, |
|
"eval_steps_per_second": 1.739, |
|
"eval_wer": 1.0, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 0.00011718099305926321, |
|
"loss": 0.0, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"eval_loss": NaN, |
|
"eval_runtime": 72.0682, |
|
"eval_samples_per_second": 13.876, |
|
"eval_steps_per_second": 1.734, |
|
"eval_wer": 1.0, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.00011077415910304323, |
|
"loss": 0.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": NaN, |
|
"eval_runtime": 86.6422, |
|
"eval_samples_per_second": 11.542, |
|
"eval_steps_per_second": 1.443, |
|
"eval_wer": 1.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 0.00010436732514682327, |
|
"loss": 0.0, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"eval_loss": NaN, |
|
"eval_runtime": 74.8452, |
|
"eval_samples_per_second": 13.361, |
|
"eval_steps_per_second": 1.67, |
|
"eval_wer": 1.0, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"learning_rate": 9.79604911906033e-05, |
|
"loss": 0.0, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"eval_loss": NaN, |
|
"eval_runtime": 73.8255, |
|
"eval_samples_per_second": 13.545, |
|
"eval_steps_per_second": 1.693, |
|
"eval_wer": 1.0, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"learning_rate": 9.155365723438334e-05, |
|
"loss": 0.0, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"eval_loss": NaN, |
|
"eval_runtime": 77.5777, |
|
"eval_samples_per_second": 12.89, |
|
"eval_steps_per_second": 1.611, |
|
"eval_wer": 1.0, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 8.514682327816338e-05, |
|
"loss": 0.0, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"eval_loss": NaN, |
|
"eval_runtime": 78.2086, |
|
"eval_samples_per_second": 12.786, |
|
"eval_steps_per_second": 1.598, |
|
"eval_wer": 1.0, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 7.87399893219434e-05, |
|
"loss": 0.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"eval_loss": NaN, |
|
"eval_runtime": 81.9911, |
|
"eval_samples_per_second": 12.196, |
|
"eval_steps_per_second": 1.525, |
|
"eval_wer": 1.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"learning_rate": 7.233315536572343e-05, |
|
"loss": 0.0, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"eval_loss": NaN, |
|
"eval_runtime": 74.0428, |
|
"eval_samples_per_second": 13.506, |
|
"eval_steps_per_second": 1.688, |
|
"eval_wer": 1.0, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 6.592632140950347e-05, |
|
"loss": 0.0, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"eval_loss": NaN, |
|
"eval_runtime": 73.6487, |
|
"eval_samples_per_second": 13.578, |
|
"eval_steps_per_second": 1.697, |
|
"eval_wer": 1.0, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 5.95194874532835e-05, |
|
"loss": 0.0, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"eval_loss": NaN, |
|
"eval_runtime": 73.0072, |
|
"eval_samples_per_second": 13.697, |
|
"eval_steps_per_second": 1.712, |
|
"eval_wer": 1.0, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"learning_rate": 5.3112653497063527e-05, |
|
"loss": 0.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"eval_loss": NaN, |
|
"eval_runtime": 79.0121, |
|
"eval_samples_per_second": 12.656, |
|
"eval_steps_per_second": 1.582, |
|
"eval_wer": 1.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 4.670581954084356e-05, |
|
"loss": 0.0, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"eval_loss": NaN, |
|
"eval_runtime": 82.7961, |
|
"eval_samples_per_second": 12.078, |
|
"eval_steps_per_second": 1.51, |
|
"eval_wer": 1.0, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"learning_rate": 4.02989855846236e-05, |
|
"loss": 0.0, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"eval_loss": NaN, |
|
"eval_runtime": 83.3819, |
|
"eval_samples_per_second": 11.993, |
|
"eval_steps_per_second": 1.499, |
|
"eval_wer": 1.0, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 3.3892151628403626e-05, |
|
"loss": 0.0, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"eval_loss": NaN, |
|
"eval_runtime": 79.5109, |
|
"eval_samples_per_second": 12.577, |
|
"eval_steps_per_second": 1.572, |
|
"eval_wer": 1.0, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 27.52, |
|
"learning_rate": 2.748531767218366e-05, |
|
"loss": 0.0, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 27.52, |
|
"eval_loss": NaN, |
|
"eval_runtime": 76.3506, |
|
"eval_samples_per_second": 13.097, |
|
"eval_steps_per_second": 1.637, |
|
"eval_wer": 1.0, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"learning_rate": 2.107848371596369e-05, |
|
"loss": 0.0, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"eval_loss": NaN, |
|
"eval_runtime": 72.6042, |
|
"eval_samples_per_second": 13.773, |
|
"eval_steps_per_second": 1.722, |
|
"eval_wer": 1.0, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 1.4671649759743725e-05, |
|
"loss": 0.0, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_loss": NaN, |
|
"eval_runtime": 83.2188, |
|
"eval_samples_per_second": 12.017, |
|
"eval_steps_per_second": 1.502, |
|
"eval_wer": 1.0, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"learning_rate": 8.264815803523758e-06, |
|
"loss": 0.0, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"eval_loss": NaN, |
|
"eval_runtime": 76.8024, |
|
"eval_samples_per_second": 13.02, |
|
"eval_steps_per_second": 1.628, |
|
"eval_wer": 1.0, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 18750, |
|
"total_flos": 1.1136444297962675e+20, |
|
"train_loss": 2.53877658203125, |
|
"train_runtime": 56941.5798, |
|
"train_samples_per_second": 10.537, |
|
"train_steps_per_second": 0.329 |
|
} |
|
], |
|
"max_steps": 18750, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.1136444297962675e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|