|
{ |
|
"best_metric": 0.2463931441307068, |
|
"best_model_checkpoint": "./checkpoint-6000", |
|
"epoch": 4.999878498238225, |
|
"global_step": 15430, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7125e-06, |
|
"loss": 7.2804, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.4625e-06, |
|
"loss": 2.9939, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.1212499999999998e-05, |
|
"loss": 2.8161, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.49625e-05, |
|
"loss": 1.8175, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8675e-05, |
|
"loss": 1.3167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.2424999999999996e-05, |
|
"loss": 1.1703, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.6174999999999996e-05, |
|
"loss": 1.1128, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.9925e-05, |
|
"loss": 1.0763, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.3675e-05, |
|
"loss": 1.0522, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7424999999999995e-05, |
|
"loss": 1.0326, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.30916452407836914, |
|
"eval_runtime": 1003.5092, |
|
"eval_samples_per_second": 15.885, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.2718170286472251, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.1175e-05, |
|
"loss": 1.0398, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.4924999999999994e-05, |
|
"loss": 1.0289, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8675e-05, |
|
"loss": 1.0403, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.2424999999999994e-05, |
|
"loss": 1.0439, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.6175e-05, |
|
"loss": 1.0455, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.9925e-05, |
|
"loss": 1.0567, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 6.36375e-05, |
|
"loss": 1.0687, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 6.738749999999999e-05, |
|
"loss": 1.0601, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.11375e-05, |
|
"loss": 1.0657, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.48875e-05, |
|
"loss": 1.0828, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.28432372212409973, |
|
"eval_runtime": 1004.4407, |
|
"eval_samples_per_second": 15.871, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.26055517279125323, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.4463886820551e-05, |
|
"loss": 1.09, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.390543559195829e-05, |
|
"loss": 1.0696, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.33469843633656e-05, |
|
"loss": 1.0794, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.279411764705881e-05, |
|
"loss": 1.0906, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.223566641846612e-05, |
|
"loss": 1.0739, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.16772151898734e-05, |
|
"loss": 1.0753, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.111876396128072e-05, |
|
"loss": 1.0694, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.0560312732688e-05, |
|
"loss": 1.0652, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.00018615040953e-05, |
|
"loss": 1.0627, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.94434102755026e-05, |
|
"loss": 1.0771, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.27740851044654846, |
|
"eval_runtime": 1003.9914, |
|
"eval_samples_per_second": 15.878, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.24881682903743005, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.88849590469099e-05, |
|
"loss": 1.0548, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.832650781831719e-05, |
|
"loss": 1.0594, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.776805658972449e-05, |
|
"loss": 1.0605, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.720960536113179e-05, |
|
"loss": 1.0501, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.665673864482501e-05, |
|
"loss": 1.06, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.609828741623231e-05, |
|
"loss": 1.0539, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.553983618763961e-05, |
|
"loss": 1.0483, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.49813849590469e-05, |
|
"loss": 1.0507, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.44229337304542e-05, |
|
"loss": 1.0416, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.38644825018615e-05, |
|
"loss": 1.0306, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 0.25875675678253174, |
|
"eval_runtime": 1004.8682, |
|
"eval_samples_per_second": 15.864, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.23513389953832187, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.33060312732688e-05, |
|
"loss": 1.0413, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.275316455696202e-05, |
|
"loss": 1.0475, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.219471332836931e-05, |
|
"loss": 1.0328, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 6.163626209977662e-05, |
|
"loss": 1.036, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 6.10778108711839e-05, |
|
"loss": 1.0248, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 6.051935964259121e-05, |
|
"loss": 1.0184, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5.996090841399851e-05, |
|
"loss": 1.0268, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5.94024571854058e-05, |
|
"loss": 1.0177, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5.88440059568131e-05, |
|
"loss": 1.0155, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.82855547282204e-05, |
|
"loss": 1.0052, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.24826321005821228, |
|
"eval_runtime": 1000.3482, |
|
"eval_samples_per_second": 15.935, |
|
"eval_steps_per_second": 0.997, |
|
"eval_wer": 0.22835360553241082, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5.7727103499627696e-05, |
|
"loss": 1.0076, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5.717423678332092e-05, |
|
"loss": 1.0107, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5.661578555472821e-05, |
|
"loss": 1.0135, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5.605733432613552e-05, |
|
"loss": 1.012, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.549888309754281e-05, |
|
"loss": 0.9943, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.4940431868950106e-05, |
|
"loss": 1.0049, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5.4381980640357404e-05, |
|
"loss": 1.0082, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.38235294117647e-05, |
|
"loss": 0.9948, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.326507818317199e-05, |
|
"loss": 0.9925, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.2706626954579296e-05, |
|
"loss": 0.9865, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.2463931441307068, |
|
"eval_runtime": 999.3892, |
|
"eval_samples_per_second": 15.951, |
|
"eval_steps_per_second": 0.998, |
|
"eval_wer": 0.22199184819353135, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.2148175725986594e-05, |
|
"loss": 0.987, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.158972449739389e-05, |
|
"loss": 0.9789, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.103127326880119e-05, |
|
"loss": 0.9722, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.047282204020848e-05, |
|
"loss": 0.9747, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.9914370811615785e-05, |
|
"loss": 0.9771, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.9355919583023076e-05, |
|
"loss": 0.9777, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.88030528667163e-05, |
|
"loss": 0.9804, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.82446016381236e-05, |
|
"loss": 0.9575, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.76861504095309e-05, |
|
"loss": 0.9613, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.713328369322412e-05, |
|
"loss": 0.978, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.2513602077960968, |
|
"eval_runtime": 1010.6008, |
|
"eval_samples_per_second": 15.774, |
|
"eval_steps_per_second": 0.987, |
|
"eval_wer": 0.21723340824066503, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.658041697691735e-05, |
|
"loss": 0.9837, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.602196574832464e-05, |
|
"loss": 1.0137, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.546351451973194e-05, |
|
"loss": 1.0971, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.4905063291139234e-05, |
|
"loss": 1.1683, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.434661206254654e-05, |
|
"loss": 1.2993, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.378816083395383e-05, |
|
"loss": 1.3806, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.322970960536113e-05, |
|
"loss": 1.4648, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.2671258376768425e-05, |
|
"loss": 1.5643, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.211280714817572e-05, |
|
"loss": 1.5596, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.155435591958301e-05, |
|
"loss": 1.7438, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 0.7983415722846985, |
|
"eval_runtime": 1011.5068, |
|
"eval_samples_per_second": 15.76, |
|
"eval_steps_per_second": 0.986, |
|
"eval_wer": 0.5072471233652923, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.099590469099032e-05, |
|
"loss": 1.7779, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 4.0437453462397615e-05, |
|
"loss": 1.9193, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.987900223380491e-05, |
|
"loss": 1.8549, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 3.932055100521221e-05, |
|
"loss": 1.9323, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.87620997766195e-05, |
|
"loss": 2.0832, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.8203648548026806e-05, |
|
"loss": 2.268, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.76451973194341e-05, |
|
"loss": 1.9962, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.7086746090841395e-05, |
|
"loss": 2.2105, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.65282948622487e-05, |
|
"loss": 2.3688, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.596984363365599e-05, |
|
"loss": 2.3309, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 1.8916891813278198, |
|
"eval_runtime": 1006.0568, |
|
"eval_samples_per_second": 15.845, |
|
"eval_steps_per_second": 0.991, |
|
"eval_wer": 0.9416173544619229, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.541139240506329e-05, |
|
"loss": 2.2688, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.4852941176470585e-05, |
|
"loss": 2.0991, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.430007446016381e-05, |
|
"loss": 2.0663, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.374162323157111e-05, |
|
"loss": 2.0995, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.3183172002978406e-05, |
|
"loss": 2.2691, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.26247207743857e-05, |
|
"loss": 2.1691, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.2066269545792995e-05, |
|
"loss": 2.2706, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.150781831720029e-05, |
|
"loss": 2.0075, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.094936708860759e-05, |
|
"loss": 2.1615, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.0390915860014888e-05, |
|
"loss": 2.1834, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 1.7495689392089844, |
|
"eval_runtime": 1013.0296, |
|
"eval_samples_per_second": 15.736, |
|
"eval_steps_per_second": 0.984, |
|
"eval_wer": 0.9029832005820879, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.983246463142219e-05, |
|
"loss": 2.2615, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 2.9274013402829486e-05, |
|
"loss": 2.387, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.871556217423678e-05, |
|
"loss": 2.3751, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.8157110945644078e-05, |
|
"loss": 2.3223, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 2.7604244229337304e-05, |
|
"loss": 2.2708, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 2.7045793000744598e-05, |
|
"loss": 2.2369, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.6487341772151896e-05, |
|
"loss": 2.3768, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.5928890543559194e-05, |
|
"loss": 2.2929, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.5376023827252416e-05, |
|
"loss": 2.3779, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.4817572598659713e-05, |
|
"loss": 2.3047, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 1.5376594066619873, |
|
"eval_runtime": 1009.9366, |
|
"eval_samples_per_second": 15.784, |
|
"eval_steps_per_second": 0.987, |
|
"eval_wer": 0.874715877993342, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.4259121370067014e-05, |
|
"loss": 2.0797, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.3700670141474312e-05, |
|
"loss": 2.1414, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.3142218912881606e-05, |
|
"loss": 2.1064, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.2583767684288904e-05, |
|
"loss": 2.0681, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.2025316455696202e-05, |
|
"loss": 2.0853, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.14668652271035e-05, |
|
"loss": 2.1272, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.0908413998510794e-05, |
|
"loss": 2.1257, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.034996276991809e-05, |
|
"loss": 2.0966, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.979151154132539e-05, |
|
"loss": 2.2297, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.9233060312732687e-05, |
|
"loss": 2.1378, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 1.3500523567199707, |
|
"eval_runtime": 1013.8007, |
|
"eval_samples_per_second": 15.724, |
|
"eval_steps_per_second": 0.983, |
|
"eval_wer": 0.7923285448445941, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.8680193596425912e-05, |
|
"loss": 2.2683, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.8127326880119135e-05, |
|
"loss": 2.311, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.7568875651526432e-05, |
|
"loss": 2.0594, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.701042442293373e-05, |
|
"loss": 1.9798, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.6457557706626952e-05, |
|
"loss": 1.8521, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.589910647803425e-05, |
|
"loss": 1.8466, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.5340655249441547e-05, |
|
"loss": 1.9372, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.4782204020848843e-05, |
|
"loss": 1.9591, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.4223752792256143e-05, |
|
"loss": 1.9641, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.3670886075949367e-05, |
|
"loss": 1.9812, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 1.266192078590393, |
|
"eval_runtime": 1008.4278, |
|
"eval_samples_per_second": 15.808, |
|
"eval_steps_per_second": 0.989, |
|
"eval_wer": 0.7696760526197176, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.3112434847356663e-05, |
|
"loss": 2.1185, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.255398361876396e-05, |
|
"loss": 2.2783, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.2001116902457184e-05, |
|
"loss": 2.3792, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.1442665673864482e-05, |
|
"loss": 2.4678, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.0884214445271778e-05, |
|
"loss": 2.5792, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.0325763216679075e-05, |
|
"loss": 2.6183, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 9.767311988086373e-06, |
|
"loss": 2.6677, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 9.20886075949367e-06, |
|
"loss": 2.7043, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 8.650409530900967e-06, |
|
"loss": 2.6737, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 8.091958302308264e-06, |
|
"loss": 2.6855, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_loss": 2.4120402336120605, |
|
"eval_runtime": 1002.3228, |
|
"eval_samples_per_second": 15.904, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 0.9901740468632286, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.533507073715562e-06, |
|
"loss": 2.7223, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 6.975055845122859e-06, |
|
"loss": 2.7861, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 6.416604616530156e-06, |
|
"loss": 2.7397, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.8581533879374525e-06, |
|
"loss": 2.7574, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.3052866716306765e-06, |
|
"loss": 2.8119, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.746835443037975e-06, |
|
"loss": 2.725, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.188384214445272e-06, |
|
"loss": 2.7319, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3.629932985852568e-06, |
|
"loss": 2.7426, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.071481757259866e-06, |
|
"loss": 2.7441, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.513030528667163e-06, |
|
"loss": 2.7482, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_loss": 2.5340933799743652, |
|
"eval_runtime": 1000.6978, |
|
"eval_samples_per_second": 15.93, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.9874181438864671, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.95457930007446e-06, |
|
"loss": 2.752, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.3961280714817571e-06, |
|
"loss": 2.7476, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 8.376768428890544e-07, |
|
"loss": 2.7484, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.7922561429635143e-07, |
|
"loss": 2.7494, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 15430, |
|
"total_flos": 8.828199178194905e+20, |
|
"train_loss": 1.7380384819737829, |
|
"train_runtime": 214447.5226, |
|
"train_samples_per_second": 9.211, |
|
"train_steps_per_second": 0.072 |
|
} |
|
], |
|
"max_steps": 15430, |
|
"num_train_epochs": 5, |
|
"total_flos": 8.828199178194905e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|