|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 13.465952563121652, |
|
"eval_steps": 2000, |
|
"global_step": 22000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.9999999999999996e-05, |
|
"loss": 33.0181, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.999999999999999e-05, |
|
"loss": 8.7646, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 4.6933, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015999999999999999, |
|
"loss": 4.5796, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019999999999999998, |
|
"loss": 4.1047, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 2.6327, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00028, |
|
"loss": 1.9893, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00029869536855838224, |
|
"loss": 1.7241, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00029608610567514673, |
|
"loss": 1.5607, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00029347684279191127, |
|
"loss": 1.4503, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_cer": 0.268688843618788, |
|
"eval_loss": 1.0610458850860596, |
|
"eval_runtime": 1586.9419, |
|
"eval_samples_per_second": 8.25, |
|
"eval_steps_per_second": 0.516, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00029086757990867576, |
|
"loss": 1.3782, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002882583170254403, |
|
"loss": 1.3244, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0002856490541422048, |
|
"loss": 1.2553, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00028303979125896933, |
|
"loss": 1.2077, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0002804305283757338, |
|
"loss": 1.1868, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00027782126549249836, |
|
"loss": 1.1624, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00027521200260926284, |
|
"loss": 1.1136, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0002726027397260274, |
|
"loss": 1.077, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00026999347684279187, |
|
"loss": 1.053, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0002673842139595564, |
|
"loss": 1.0239, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_cer": 0.19039349567825709, |
|
"eval_loss": 0.6961866617202759, |
|
"eval_runtime": 1607.479, |
|
"eval_samples_per_second": 8.145, |
|
"eval_steps_per_second": 0.509, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0002647749510763209, |
|
"loss": 1.0206, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00026216568819308544, |
|
"loss": 1.0045, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0002595564253098499, |
|
"loss": 0.9802, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00025694716242661447, |
|
"loss": 0.9839, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00025433789954337895, |
|
"loss": 0.9243, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.0002517286366601435, |
|
"loss": 0.9082, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.000249119373776908, |
|
"loss": 0.9017, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0002465101108936725, |
|
"loss": 0.8871, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.00024390084801043704, |
|
"loss": 0.9036, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00024129158512720155, |
|
"loss": 0.8977, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_cer": 0.16872254319465907, |
|
"eval_loss": 0.594495415687561, |
|
"eval_runtime": 1591.6049, |
|
"eval_samples_per_second": 8.226, |
|
"eval_steps_per_second": 0.515, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.00023868232224396607, |
|
"loss": 0.8843, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00023607305936073058, |
|
"loss": 0.8757, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0002334637964774951, |
|
"loss": 0.8573, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0002308545335942596, |
|
"loss": 0.8126, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00022824527071102412, |
|
"loss": 0.8192, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.00022563600782778863, |
|
"loss": 0.8061, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.00022302674494455315, |
|
"loss": 0.8123, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.00022041748206131766, |
|
"loss": 0.8046, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.00021780821917808218, |
|
"loss": 0.7979, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.0002151989562948467, |
|
"loss": 0.804, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_cer": 0.14924661713942214, |
|
"eval_loss": 0.5327703952789307, |
|
"eval_runtime": 1595.6324, |
|
"eval_samples_per_second": 8.206, |
|
"eval_steps_per_second": 0.513, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0002125896934116112, |
|
"loss": 0.7867, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00020998043052837572, |
|
"loss": 0.7557, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00020737116764514023, |
|
"loss": 0.7478, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00020476190476190475, |
|
"loss": 0.7398, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.00020215264187866926, |
|
"loss": 0.7408, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.00019954337899543377, |
|
"loss": 0.75, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0001969341161121983, |
|
"loss": 0.7344, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.0001943248532289628, |
|
"loss": 0.738, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00019171559034572732, |
|
"loss": 0.7373, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00018910632746249183, |
|
"loss": 0.698, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_cer": 0.13653489424101573, |
|
"eval_loss": 0.5013594031333923, |
|
"eval_runtime": 1605.1161, |
|
"eval_samples_per_second": 8.157, |
|
"eval_steps_per_second": 0.51, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00018649706457925634, |
|
"loss": 0.6943, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00018388780169602086, |
|
"loss": 0.6997, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.00018127853881278537, |
|
"loss": 0.6929, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.00017866927592954989, |
|
"loss": 0.7003, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0001760600130463144, |
|
"loss": 0.6863, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00017345075016307891, |
|
"loss": 0.6883, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.00017084148727984343, |
|
"loss": 0.6787, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00016823222439660794, |
|
"loss": 0.6518, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00016562296151337246, |
|
"loss": 0.6494, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00016301369863013697, |
|
"loss": 0.6426, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_cer": 0.13216305737125092, |
|
"eval_loss": 0.47150149941444397, |
|
"eval_runtime": 1597.7342, |
|
"eval_samples_per_second": 8.195, |
|
"eval_steps_per_second": 0.513, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.00016040443574690148, |
|
"loss": 0.6457, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.000157795172863666, |
|
"loss": 0.6429, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.0001551859099804305, |
|
"loss": 0.6512, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00015257664709719503, |
|
"loss": 0.6458, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00014996738421395954, |
|
"loss": 0.6501, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 0.00014735812133072405, |
|
"loss": 0.6095, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.00014474885844748857, |
|
"loss": 0.6132, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.00014213959556425308, |
|
"loss": 0.6102, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.0001395303326810176, |
|
"loss": 0.6125, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.0001369210697977821, |
|
"loss": 0.61, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_cer": 0.1257692459492199, |
|
"eval_loss": 0.45295360684394836, |
|
"eval_runtime": 1603.0798, |
|
"eval_samples_per_second": 8.167, |
|
"eval_steps_per_second": 0.511, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.00013431180691454662, |
|
"loss": 0.606, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.00013170254403131114, |
|
"loss": 0.5957, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 0.00012909328114807565, |
|
"loss": 0.5992, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 0.00012648401826484017, |
|
"loss": 0.5752, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.00012387475538160468, |
|
"loss": 0.5654, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.00012126549249836919, |
|
"loss": 0.5725, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.00011865622961513371, |
|
"loss": 0.5713, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 0.00011604696673189822, |
|
"loss": 0.5649, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00011343770384866273, |
|
"loss": 0.5643, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.00011082844096542725, |
|
"loss": 0.5709, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"eval_cer": 0.1200554980402634, |
|
"eval_loss": 0.4299587607383728, |
|
"eval_runtime": 1609.5227, |
|
"eval_samples_per_second": 8.135, |
|
"eval_steps_per_second": 0.509, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.00010821917808219176, |
|
"loss": 0.5666, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 0.00010560991519895628, |
|
"loss": 0.5531, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 0.00010300065231572079, |
|
"loss": 0.5389, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 0.0001003913894324853, |
|
"loss": 0.5456, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 9.778212654924982e-05, |
|
"loss": 0.5353, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 9.517286366601433e-05, |
|
"loss": 0.5337, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 9.256360078277885e-05, |
|
"loss": 0.5296, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 8.995433789954336e-05, |
|
"loss": 0.5372, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 8.734507501630787e-05, |
|
"loss": 0.5388, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 8.473581213307239e-05, |
|
"loss": 0.5235, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_cer": 0.11664607248141211, |
|
"eval_loss": 0.4167773723602295, |
|
"eval_runtime": 1608.2913, |
|
"eval_samples_per_second": 8.141, |
|
"eval_steps_per_second": 0.509, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 8.212654924983692e-05, |
|
"loss": 0.509, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 7.951728636660143e-05, |
|
"loss": 0.5116, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 7.690802348336594e-05, |
|
"loss": 0.4967, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 7.429876060013046e-05, |
|
"loss": 0.511, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 7.168949771689497e-05, |
|
"loss": 0.5056, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 6.908023483365949e-05, |
|
"loss": 0.5073, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 6.6470971950424e-05, |
|
"loss": 0.4968, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 6.386170906718851e-05, |
|
"loss": 0.5009, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 6.125244618395303e-05, |
|
"loss": 0.4832, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 5.864318330071754e-05, |
|
"loss": 0.4778, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"eval_cer": 0.11294822712906938, |
|
"eval_loss": 0.40570223331451416, |
|
"eval_runtime": 1612.661, |
|
"eval_samples_per_second": 8.119, |
|
"eval_steps_per_second": 0.508, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 5.6033920417482055e-05, |
|
"loss": 0.4775, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 5.342465753424657e-05, |
|
"loss": 0.4855, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 5.081539465101108e-05, |
|
"loss": 0.4773, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 4.82061317677756e-05, |
|
"loss": 0.4745, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 4.559686888454011e-05, |
|
"loss": 0.48, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 4.2987606001304625e-05, |
|
"loss": 0.463, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 4.037834311806914e-05, |
|
"loss": 0.4643, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 3.776908023483365e-05, |
|
"loss": 0.449, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 3.515981735159817e-05, |
|
"loss": 0.4604, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3.255055446836268e-05, |
|
"loss": 0.4571, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"eval_cer": 0.10995473327241098, |
|
"eval_loss": 0.3945465385913849, |
|
"eval_runtime": 1610.0901, |
|
"eval_samples_per_second": 8.132, |
|
"eval_steps_per_second": 0.509, |
|
"step": 22000 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 24495, |
|
"num_train_epochs": 15, |
|
"save_steps": 2000, |
|
"total_flos": 1.5466215358461693e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|