|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.992926196651734, |
|
"eval_steps": 2000, |
|
"global_step": 63600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014849999999999998, |
|
"loss": 15.6695, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.4987, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00029763258785942486, |
|
"loss": 2.3147, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002952364217252396, |
|
"loss": 1.7721, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_cer": 0.29026945911872865, |
|
"eval_loss": 1.1368173360824585, |
|
"eval_runtime": 2718.6813, |
|
"eval_samples_per_second": 6.221, |
|
"eval_steps_per_second": 0.389, |
|
"eval_wer": 0.6589364856615053, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0002928402555910543, |
|
"loss": 1.5735, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.000290444089456869, |
|
"loss": 1.4611, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00028805271565495203, |
|
"loss": 1.4056, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00028565654952076674, |
|
"loss": 1.3501, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_cer": 0.2240432390931656, |
|
"eval_loss": 0.8561204671859741, |
|
"eval_runtime": 2698.4922, |
|
"eval_samples_per_second": 6.268, |
|
"eval_steps_per_second": 0.392, |
|
"eval_wer": 0.5451323644839807, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00028326038338658146, |
|
"loss": 1.2967, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00028086421725239617, |
|
"loss": 1.244, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0002784728434504792, |
|
"loss": 1.2361, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0002760766773162939, |
|
"loss": 1.2133, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_cer": 0.2002927101645895, |
|
"eval_loss": 0.7505359053611755, |
|
"eval_runtime": 2721.6856, |
|
"eval_samples_per_second": 6.215, |
|
"eval_steps_per_second": 0.389, |
|
"eval_wer": 0.49743516558444134, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.0002736805111821086, |
|
"loss": 1.1755, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0002712843450479233, |
|
"loss": 1.1403, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.000268888178913738, |
|
"loss": 1.1253, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00026649680511182103, |
|
"loss": 1.0981, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_cer": 0.18418492651709803, |
|
"eval_loss": 0.6767656803131104, |
|
"eval_runtime": 2740.3957, |
|
"eval_samples_per_second": 6.172, |
|
"eval_steps_per_second": 0.386, |
|
"eval_wer": 0.46858375155594667, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.00026410063897763575, |
|
"loss": 1.0984, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00026170447284345046, |
|
"loss": 1.0614, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00025930830670926517, |
|
"loss": 1.0553, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.0002569169329073482, |
|
"loss": 1.0375, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_cer": 0.17071197058066542, |
|
"eval_loss": 0.64134681224823, |
|
"eval_runtime": 2703.0312, |
|
"eval_samples_per_second": 6.257, |
|
"eval_steps_per_second": 0.391, |
|
"eval_wer": 0.44041417913406117, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0002545207667731629, |
|
"loss": 1.0351, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00025212460063897763, |
|
"loss": 1.0087, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00024973322683706067, |
|
"loss": 0.9934, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.0002473370607028754, |
|
"loss": 0.9927, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_cer": 0.16338767301962598, |
|
"eval_loss": 0.6106029152870178, |
|
"eval_runtime": 2826.5991, |
|
"eval_samples_per_second": 5.984, |
|
"eval_steps_per_second": 0.374, |
|
"eval_wer": 0.42461289632043386, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.0002449408945686901, |
|
"loss": 0.9821, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.0002425447284345048, |
|
"loss": 0.9637, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00024014856230031946, |
|
"loss": 0.9497, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00023775239616613414, |
|
"loss": 0.9439, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_cer": 0.1613024947979602, |
|
"eval_loss": 0.5999171733856201, |
|
"eval_runtime": 2741.759, |
|
"eval_samples_per_second": 6.169, |
|
"eval_steps_per_second": 0.386, |
|
"eval_wer": 0.41588373807768236, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00023536102236421723, |
|
"loss": 0.948, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.0002329696485623003, |
|
"loss": 0.9367, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.000230573482428115, |
|
"loss": 0.9087, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.0002281773162939297, |
|
"loss": 0.9059, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_cer": 0.15351797484699242, |
|
"eval_loss": 0.5740103721618652, |
|
"eval_runtime": 2702.3695, |
|
"eval_samples_per_second": 6.259, |
|
"eval_steps_per_second": 0.392, |
|
"eval_wer": 0.39852056228147376, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00022578115015974438, |
|
"loss": 0.9075, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.0002233849840255591, |
|
"loss": 0.8999, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.00022098881789137377, |
|
"loss": 0.8597, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.00021859265175718849, |
|
"loss": 0.8772, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_cer": 0.14781427082015555, |
|
"eval_loss": 0.5568912625312805, |
|
"eval_runtime": 2727.4118, |
|
"eval_samples_per_second": 6.201, |
|
"eval_steps_per_second": 0.388, |
|
"eval_wer": 0.3954364182701837, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.00021619648562300317, |
|
"loss": 0.8785, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.00021380511182108623, |
|
"loss": 0.8702, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.00021140894568690095, |
|
"loss": 0.8447, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.00020901277955271563, |
|
"loss": 0.8483, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_cer": 0.14274528108464166, |
|
"eval_loss": 0.5406663417816162, |
|
"eval_runtime": 2824.1098, |
|
"eval_samples_per_second": 5.989, |
|
"eval_steps_per_second": 0.375, |
|
"eval_wer": 0.3784141632772796, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00020661661341853031, |
|
"loss": 0.8358, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.00020422523961661338, |
|
"loss": 0.8391, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.00020182907348242812, |
|
"loss": 0.8215, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 0.00019943769968051118, |
|
"loss": 0.81, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_cer": 0.1415456492625536, |
|
"eval_loss": 0.5282983779907227, |
|
"eval_runtime": 2742.622, |
|
"eval_samples_per_second": 6.167, |
|
"eval_steps_per_second": 0.386, |
|
"eval_wer": 0.37441032593614476, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 0.00019704153354632587, |
|
"loss": 0.8108, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 0.00019464536741214058, |
|
"loss": 0.8175, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.00019224920127795526, |
|
"loss": 0.8001, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 0.00018985782747603833, |
|
"loss": 0.793, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_cer": 0.13662715879199255, |
|
"eval_loss": 0.517921507358551, |
|
"eval_runtime": 2729.8979, |
|
"eval_samples_per_second": 6.196, |
|
"eval_steps_per_second": 0.388, |
|
"eval_wer": 0.36633129573690426, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 0.000187461661341853, |
|
"loss": 0.7827, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 0.00018506549520766772, |
|
"loss": 0.7899, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 0.0001826693290734824, |
|
"loss": 0.7806, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 0.00018027316293929712, |
|
"loss": 0.7577, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_cer": 0.1359182854425769, |
|
"eval_loss": 0.5058821439743042, |
|
"eval_runtime": 2722.7634, |
|
"eval_samples_per_second": 6.212, |
|
"eval_steps_per_second": 0.389, |
|
"eval_wer": 0.35946530932616605, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.00017788178913738016, |
|
"loss": 0.762, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 0.00017548562300319487, |
|
"loss": 0.7595, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.00017308945686900955, |
|
"loss": 0.7629, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 0.00017069329073482426, |
|
"loss": 0.7379, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"eval_cer": 0.13330526921919236, |
|
"eval_loss": 0.4969228506088257, |
|
"eval_runtime": 2824.8712, |
|
"eval_samples_per_second": 5.988, |
|
"eval_steps_per_second": 0.375, |
|
"eval_wer": 0.35324945095893884, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 0.00016830191693290736, |
|
"loss": 0.737, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 0.000165905750798722, |
|
"loss": 0.7444, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 0.00016350958466453675, |
|
"loss": 0.7372, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 0.00016111341853035144, |
|
"loss": 0.7328, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_cer": 0.13079476698787718, |
|
"eval_loss": 0.4908413589000702, |
|
"eval_runtime": 2825.8542, |
|
"eval_samples_per_second": 5.985, |
|
"eval_steps_per_second": 0.374, |
|
"eval_wer": 0.3475251528197322, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 0.0001587172523961661, |
|
"loss": 0.7184, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 0.00015632108626198083, |
|
"loss": 0.7216, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 0.0001539297124600639, |
|
"loss": 0.7238, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 0.00015153354632587858, |
|
"loss": 0.7119, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_cer": 0.12864851660072327, |
|
"eval_loss": 0.4887321889400482, |
|
"eval_runtime": 2731.0336, |
|
"eval_samples_per_second": 6.193, |
|
"eval_steps_per_second": 0.387, |
|
"eval_wer": 0.34784228845071313, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 0.00014914217252396165, |
|
"loss": 0.7124, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 0.00014674600638977636, |
|
"loss": 0.7294, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 0.00014434984025559104, |
|
"loss": 0.7545, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.00014195367412140575, |
|
"loss": 0.7572, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"eval_cer": 0.13271854020075294, |
|
"eval_loss": 0.5169993042945862, |
|
"eval_runtime": 2729.0002, |
|
"eval_samples_per_second": 6.198, |
|
"eval_steps_per_second": 0.388, |
|
"eval_wer": 0.3576893497926726, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 0.00013956230031948882, |
|
"loss": 0.7687, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 0.0001371661341853035, |
|
"loss": 0.7884, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 0.00013476996805111819, |
|
"loss": 0.8156, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 0.0001323738019169329, |
|
"loss": 0.8198, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_cer": 0.1431662427967562, |
|
"eval_loss": 0.5838645696640015, |
|
"eval_runtime": 2730.4526, |
|
"eval_samples_per_second": 6.195, |
|
"eval_steps_per_second": 0.387, |
|
"eval_wer": 0.38254485487080686, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 0.00012997763578274758, |
|
"loss": 0.819, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 0.00012758626198083067, |
|
"loss": 0.8411, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 0.00012519009584664536, |
|
"loss": 0.8366, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 0.00012279392971246007, |
|
"loss": 0.8008, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_cer": 0.13762394377870937, |
|
"eval_loss": 0.5447062253952026, |
|
"eval_runtime": 2738.2931, |
|
"eval_samples_per_second": 6.177, |
|
"eval_steps_per_second": 0.386, |
|
"eval_wer": 0.36609344401366856, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 0.00012039776357827474, |
|
"loss": 0.8032, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"learning_rate": 0.00011800159744408944, |
|
"loss": 0.7753, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 0.00011560543130990414, |
|
"loss": 0.7608, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"learning_rate": 0.00011321405750798721, |
|
"loss": 0.759, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"eval_cer": 0.1336804268071908, |
|
"eval_loss": 0.49982598423957825, |
|
"eval_runtime": 2725.5181, |
|
"eval_samples_per_second": 6.206, |
|
"eval_steps_per_second": 0.388, |
|
"eval_wer": 0.3533921619928803, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 0.00011081789137380191, |
|
"loss": 0.7285, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 0.00010842172523961661, |
|
"loss": 0.7036, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.00010602555910543131, |
|
"loss": 0.6953, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 0.00010363418530351436, |
|
"loss": 0.6907, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_cer": 0.12877502322923437, |
|
"eval_loss": 0.47100237011909485, |
|
"eval_runtime": 2667.1801, |
|
"eval_samples_per_second": 6.342, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 0.34119829698166165, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 20.04, |
|
"learning_rate": 0.00010123801916932906, |
|
"loss": 0.6858, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 20.28, |
|
"learning_rate": 9.884664536741213e-05, |
|
"loss": 0.6603, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"learning_rate": 9.645047923322683e-05, |
|
"loss": 0.6609, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 9.405431309904153e-05, |
|
"loss": 0.659, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_cer": 0.12423387149543921, |
|
"eval_loss": 0.4578304886817932, |
|
"eval_runtime": 2665.8908, |
|
"eval_samples_per_second": 6.345, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 0.3324532819573611, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"learning_rate": 9.165814696485623e-05, |
|
"loss": 0.6567, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 8.926198083067093e-05, |
|
"loss": 0.6437, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 21.46, |
|
"learning_rate": 8.686581469648561e-05, |
|
"loss": 0.6371, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"learning_rate": 8.447444089456868e-05, |
|
"loss": 0.6345, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"eval_cer": 0.12205708502554125, |
|
"eval_loss": 0.45305466651916504, |
|
"eval_runtime": 2667.1819, |
|
"eval_samples_per_second": 6.342, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 0.3256982930174662, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 21.93, |
|
"learning_rate": 8.207827476038337e-05, |
|
"loss": 0.6418, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"learning_rate": 7.968210862619807e-05, |
|
"loss": 0.6306, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 7.728594249201278e-05, |
|
"loss": 0.6213, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 7.489456869009583e-05, |
|
"loss": 0.6242, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"eval_cer": 0.12094251800538308, |
|
"eval_loss": 0.4497627019882202, |
|
"eval_runtime": 2727.8154, |
|
"eval_samples_per_second": 6.201, |
|
"eval_steps_per_second": 0.388, |
|
"eval_wer": 0.32180545314717474, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 22.87, |
|
"learning_rate": 7.249840255591053e-05, |
|
"loss": 0.6294, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"learning_rate": 7.010223642172524e-05, |
|
"loss": 0.6141, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 23.34, |
|
"learning_rate": 6.770607028753993e-05, |
|
"loss": 0.6155, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 6.530990415335462e-05, |
|
"loss": 0.6163, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_cer": 0.11941571386818009, |
|
"eval_loss": 0.45521289110183716, |
|
"eval_runtime": 2664.8843, |
|
"eval_samples_per_second": 6.347, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 0.3188402349975026, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 23.82, |
|
"learning_rate": 6.291373801916932e-05, |
|
"loss": 0.6167, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 24.05, |
|
"learning_rate": 6.0522364217252394e-05, |
|
"loss": 0.6179, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 5.8126198083067085e-05, |
|
"loss": 0.6154, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 5.573482428115016e-05, |
|
"loss": 0.6121, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"eval_cer": 0.1153500525656853, |
|
"eval_loss": 0.46334853768348694, |
|
"eval_runtime": 2666.3375, |
|
"eval_samples_per_second": 6.344, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 0.3136947093848362, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"learning_rate": 5.3338658146964855e-05, |
|
"loss": 0.6227, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"learning_rate": 5.0942492012779546e-05, |
|
"loss": 0.6156, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"learning_rate": 4.854632587859424e-05, |
|
"loss": 0.6159, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 4.615015974440894e-05, |
|
"loss": 0.6054, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"eval_cer": 0.11759009234983882, |
|
"eval_loss": 0.46227386593818665, |
|
"eval_runtime": 2666.9277, |
|
"eval_samples_per_second": 6.342, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 0.3171356309809798, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 25.7, |
|
"learning_rate": 4.375399361022364e-05, |
|
"loss": 0.6051, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 4.1362619808306704e-05, |
|
"loss": 0.5986, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"learning_rate": 3.89664536741214e-05, |
|
"loss": 0.5916, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"learning_rate": 3.65702875399361e-05, |
|
"loss": 0.591, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"eval_cer": 0.11455829556310718, |
|
"eval_loss": 0.4413212835788727, |
|
"eval_runtime": 2669.4551, |
|
"eval_samples_per_second": 6.336, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 0.31158575743881267, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 3.41741214057508e-05, |
|
"loss": 0.5904, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 3.178274760383386e-05, |
|
"loss": 0.5887, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 2.9386581469648557e-05, |
|
"loss": 0.5768, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"learning_rate": 2.6990415335463258e-05, |
|
"loss": 0.5713, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"eval_cer": 0.11345245313801873, |
|
"eval_loss": 0.4338010549545288, |
|
"eval_runtime": 2668.5066, |
|
"eval_samples_per_second": 6.338, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 0.3092706673326515, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"learning_rate": 2.4594249201277952e-05, |
|
"loss": 0.5653, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"learning_rate": 2.219808306709265e-05, |
|
"loss": 0.569, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 28.06, |
|
"learning_rate": 1.980670926517572e-05, |
|
"loss": 0.5748, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 1.7410543130990413e-05, |
|
"loss": 0.5703, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"eval_cer": 0.11209795975344294, |
|
"eval_loss": 0.42797738313674927, |
|
"eval_runtime": 2667.5384, |
|
"eval_samples_per_second": 6.341, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 0.30612309619516526, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 28.53, |
|
"learning_rate": 1.501437699680511e-05, |
|
"loss": 0.5606, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 28.77, |
|
"learning_rate": 1.2623003194888177e-05, |
|
"loss": 0.5647, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 1.0226837060702875e-05, |
|
"loss": 0.5567, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 29.24, |
|
"learning_rate": 7.830670926517571e-06, |
|
"loss": 0.5576, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 29.24, |
|
"eval_cer": 0.11193437359588548, |
|
"eval_loss": 0.42482054233551025, |
|
"eval_runtime": 2665.5981, |
|
"eval_samples_per_second": 6.345, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 0.30466427229265275, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"learning_rate": 5.434504792332268e-06, |
|
"loss": 0.5596, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 3.0431309904153355e-06, |
|
"loss": 0.5567, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 29.95, |
|
"learning_rate": 6.469648562300319e-07, |
|
"loss": 0.5581, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"step": 63600, |
|
"total_flos": 4.271260053717039e+20, |
|
"train_loss": 0.9622837933354408, |
|
"train_runtime": 156348.4692, |
|
"train_samples_per_second": 26.04, |
|
"train_steps_per_second": 0.407 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 63600, |
|
"num_train_epochs": 30, |
|
"save_steps": 2000, |
|
"total_flos": 4.271260053717039e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|