|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 300.0, |
|
"eval_steps": 150, |
|
"global_step": 6900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0003, |
|
"loss": 35.2887, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00029781021897810217, |
|
"loss": 5.9569, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00029562043795620436, |
|
"loss": 4.9138, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_cer": 1.0, |
|
"eval_loss": 4.7965407371521, |
|
"eval_runtime": 1.256, |
|
"eval_samples_per_second": 35.828, |
|
"eval_steps_per_second": 2.389, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.00029343065693430656, |
|
"loss": 4.887, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.00029124087591240875, |
|
"loss": 4.8447, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.00028905109489051094, |
|
"loss": 4.7484, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_cer": 1.0, |
|
"eval_loss": 4.608075141906738, |
|
"eval_runtime": 1.2451, |
|
"eval_samples_per_second": 36.142, |
|
"eval_steps_per_second": 2.409, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 0.00028686131386861314, |
|
"loss": 4.6529, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 0.0002846715328467153, |
|
"loss": 4.6373, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.00028248175182481747, |
|
"loss": 4.5894, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"eval_cer": 0.9851301115241635, |
|
"eval_loss": 4.469708442687988, |
|
"eval_runtime": 1.2325, |
|
"eval_samples_per_second": 36.51, |
|
"eval_steps_per_second": 2.434, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 0.00028029197080291966, |
|
"loss": 4.5045, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 0.00027810218978102186, |
|
"loss": 4.4076, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 0.00027591240875912405, |
|
"loss": 4.2024, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"eval_cer": 0.9076827757125155, |
|
"eval_loss": 4.037315845489502, |
|
"eval_runtime": 1.2357, |
|
"eval_samples_per_second": 36.417, |
|
"eval_steps_per_second": 2.428, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 0.00027372262773722625, |
|
"loss": 3.8743, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 0.00027153284671532844, |
|
"loss": 3.3488, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"learning_rate": 0.00026934306569343063, |
|
"loss": 2.7314, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"eval_cer": 0.5340768277571252, |
|
"eval_loss": 2.5507473945617676, |
|
"eval_runtime": 1.2278, |
|
"eval_samples_per_second": 36.651, |
|
"eval_steps_per_second": 2.443, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 0.00026715328467153283, |
|
"loss": 2.1968, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 0.000264963503649635, |
|
"loss": 1.6522, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 0.0002627737226277372, |
|
"loss": 1.2293, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"eval_cer": 0.4138785625774473, |
|
"eval_loss": 2.01461124420166, |
|
"eval_runtime": 1.2246, |
|
"eval_samples_per_second": 36.746, |
|
"eval_steps_per_second": 2.45, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"learning_rate": 0.0002605839416058394, |
|
"loss": 0.9292, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 0.00025839416058394155, |
|
"loss": 0.7208, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"learning_rate": 0.00025620437956204374, |
|
"loss": 0.5544, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"eval_cer": 0.355638166047088, |
|
"eval_loss": 1.9821244478225708, |
|
"eval_runtime": 1.2073, |
|
"eval_samples_per_second": 37.275, |
|
"eval_steps_per_second": 2.485, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 0.00025401459854014594, |
|
"loss": 0.4757, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.00025182481751824813, |
|
"loss": 0.3895, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"learning_rate": 0.0002496350364963503, |
|
"loss": 0.3224, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"eval_cer": 0.3587360594795539, |
|
"eval_loss": 2.0189881324768066, |
|
"eval_runtime": 1.1983, |
|
"eval_samples_per_second": 37.554, |
|
"eval_steps_per_second": 2.504, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 54.35, |
|
"learning_rate": 0.0002474452554744525, |
|
"loss": 0.279, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 56.52, |
|
"learning_rate": 0.0002452554744525547, |
|
"loss": 0.2285, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 58.7, |
|
"learning_rate": 0.0002430656934306569, |
|
"loss": 0.1951, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 58.7, |
|
"eval_cer": 0.36121437422552666, |
|
"eval_loss": 2.1229116916656494, |
|
"eval_runtime": 1.2603, |
|
"eval_samples_per_second": 35.706, |
|
"eval_steps_per_second": 2.38, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 60.87, |
|
"learning_rate": 0.0002408759124087591, |
|
"loss": 0.1964, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 63.04, |
|
"learning_rate": 0.0002386861313868613, |
|
"loss": 0.1622, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"learning_rate": 0.0002364963503649635, |
|
"loss": 0.1539, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"eval_cer": 0.3469640644361834, |
|
"eval_loss": 2.111368179321289, |
|
"eval_runtime": 1.2194, |
|
"eval_samples_per_second": 36.903, |
|
"eval_steps_per_second": 2.46, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 67.39, |
|
"learning_rate": 0.00023430656934306568, |
|
"loss": 0.1492, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 69.57, |
|
"learning_rate": 0.00023211678832116788, |
|
"loss": 0.1404, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 71.74, |
|
"learning_rate": 0.00022992700729927004, |
|
"loss": 0.1165, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 71.74, |
|
"eval_cer": 0.33147459727385375, |
|
"eval_loss": 2.274796485900879, |
|
"eval_runtime": 1.1874, |
|
"eval_samples_per_second": 37.898, |
|
"eval_steps_per_second": 2.527, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 73.91, |
|
"learning_rate": 0.00022773722627737224, |
|
"loss": 0.1268, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 76.09, |
|
"learning_rate": 0.00022554744525547443, |
|
"loss": 0.1186, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 78.26, |
|
"learning_rate": 0.00022335766423357663, |
|
"loss": 0.1119, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 78.26, |
|
"eval_cer": 0.34882280049566294, |
|
"eval_loss": 2.2390518188476562, |
|
"eval_runtime": 1.3465, |
|
"eval_samples_per_second": 33.42, |
|
"eval_steps_per_second": 2.228, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 80.43, |
|
"learning_rate": 0.00022116788321167882, |
|
"loss": 0.0988, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 82.61, |
|
"learning_rate": 0.00021897810218978101, |
|
"loss": 0.112, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 84.78, |
|
"learning_rate": 0.0002167883211678832, |
|
"loss": 0.0989, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 84.78, |
|
"eval_cer": 0.3382899628252788, |
|
"eval_loss": 2.343754529953003, |
|
"eval_runtime": 1.2055, |
|
"eval_samples_per_second": 37.329, |
|
"eval_steps_per_second": 2.489, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"learning_rate": 0.00021459854014598537, |
|
"loss": 0.097, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 89.13, |
|
"learning_rate": 0.00021240875912408757, |
|
"loss": 0.0854, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 91.3, |
|
"learning_rate": 0.00021021897810218976, |
|
"loss": 0.0915, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 91.3, |
|
"eval_cer": 0.3587360594795539, |
|
"eval_loss": 2.121840000152588, |
|
"eval_runtime": 1.2037, |
|
"eval_samples_per_second": 37.386, |
|
"eval_steps_per_second": 2.492, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 93.48, |
|
"learning_rate": 0.00020802919708029196, |
|
"loss": 0.078, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 95.65, |
|
"learning_rate": 0.00020583941605839415, |
|
"loss": 0.0857, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 97.83, |
|
"learning_rate": 0.00020364963503649632, |
|
"loss": 0.0721, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 97.83, |
|
"eval_cer": 0.35192069392812886, |
|
"eval_loss": 2.242812395095825, |
|
"eval_runtime": 1.1964, |
|
"eval_samples_per_second": 37.614, |
|
"eval_steps_per_second": 2.508, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.0002014598540145985, |
|
"loss": 0.0799, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 102.17, |
|
"learning_rate": 0.0001992700729927007, |
|
"loss": 0.0798, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 104.35, |
|
"learning_rate": 0.0001970802919708029, |
|
"loss": 0.0742, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 104.35, |
|
"eval_cer": 0.33643122676579923, |
|
"eval_loss": 2.229339838027954, |
|
"eval_runtime": 1.2156, |
|
"eval_samples_per_second": 37.019, |
|
"eval_steps_per_second": 2.468, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 106.52, |
|
"learning_rate": 0.0001948905109489051, |
|
"loss": 0.0692, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 108.7, |
|
"learning_rate": 0.0001927007299270073, |
|
"loss": 0.0664, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 110.87, |
|
"learning_rate": 0.00019051094890510948, |
|
"loss": 0.0629, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 110.87, |
|
"eval_cer": 0.33705080545229243, |
|
"eval_loss": 2.2878150939941406, |
|
"eval_runtime": 1.2044, |
|
"eval_samples_per_second": 37.364, |
|
"eval_steps_per_second": 2.491, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 113.04, |
|
"learning_rate": 0.00018832116788321167, |
|
"loss": 0.0619, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 115.22, |
|
"learning_rate": 0.00018613138686131387, |
|
"loss": 0.0582, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 117.39, |
|
"learning_rate": 0.00018394160583941606, |
|
"loss": 0.0495, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 117.39, |
|
"eval_cer": 0.34076827757125155, |
|
"eval_loss": 2.2671637535095215, |
|
"eval_runtime": 1.2039, |
|
"eval_samples_per_second": 37.379, |
|
"eval_steps_per_second": 2.492, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 119.57, |
|
"learning_rate": 0.00018175182481751826, |
|
"loss": 0.0614, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 121.74, |
|
"learning_rate": 0.00017956204379562042, |
|
"loss": 0.0565, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 123.91, |
|
"learning_rate": 0.00017737226277372262, |
|
"loss": 0.0466, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 123.91, |
|
"eval_cer": 0.35254027261462206, |
|
"eval_loss": 2.2532107830047607, |
|
"eval_runtime": 1.3563, |
|
"eval_samples_per_second": 33.179, |
|
"eval_steps_per_second": 2.212, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 126.09, |
|
"learning_rate": 0.00017518248175182478, |
|
"loss": 0.0465, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 128.26, |
|
"learning_rate": 0.00017299270072992698, |
|
"loss": 0.0496, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 130.43, |
|
"learning_rate": 0.00017080291970802917, |
|
"loss": 0.0424, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 130.43, |
|
"eval_cer": 0.32589838909541513, |
|
"eval_loss": 2.2844393253326416, |
|
"eval_runtime": 1.2006, |
|
"eval_samples_per_second": 37.48, |
|
"eval_steps_per_second": 2.499, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 132.61, |
|
"learning_rate": 0.00016861313868613137, |
|
"loss": 0.0483, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 134.78, |
|
"learning_rate": 0.00016642335766423356, |
|
"loss": 0.0488, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 136.96, |
|
"learning_rate": 0.00016423357664233575, |
|
"loss": 0.0446, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 136.96, |
|
"eval_cer": 0.3252788104089219, |
|
"eval_loss": 2.2763445377349854, |
|
"eval_runtime": 1.2043, |
|
"eval_samples_per_second": 37.368, |
|
"eval_steps_per_second": 2.491, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 139.13, |
|
"learning_rate": 0.00016204379562043795, |
|
"loss": 0.0424, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 141.3, |
|
"learning_rate": 0.00015985401459854014, |
|
"loss": 0.0429, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 143.48, |
|
"learning_rate": 0.00015766423357664234, |
|
"loss": 0.0411, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 143.48, |
|
"eval_cer": 0.3302354399008674, |
|
"eval_loss": 2.301079034805298, |
|
"eval_runtime": 1.345, |
|
"eval_samples_per_second": 33.458, |
|
"eval_steps_per_second": 2.231, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 145.65, |
|
"learning_rate": 0.00015547445255474453, |
|
"loss": 0.0392, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 147.83, |
|
"learning_rate": 0.00015328467153284672, |
|
"loss": 0.0426, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 0.00015109489051094892, |
|
"loss": 0.0419, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_cer": 0.3420074349442379, |
|
"eval_loss": 2.320059299468994, |
|
"eval_runtime": 1.2411, |
|
"eval_samples_per_second": 36.259, |
|
"eval_steps_per_second": 2.417, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 152.17, |
|
"learning_rate": 0.00014890510948905108, |
|
"loss": 0.0386, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 154.35, |
|
"learning_rate": 0.00014671532846715328, |
|
"loss": 0.0402, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 156.52, |
|
"learning_rate": 0.00014452554744525547, |
|
"loss": 0.0333, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 156.52, |
|
"eval_cer": 0.34386617100371747, |
|
"eval_loss": 2.364445209503174, |
|
"eval_runtime": 1.2337, |
|
"eval_samples_per_second": 36.475, |
|
"eval_steps_per_second": 2.432, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 158.7, |
|
"learning_rate": 0.00014233576642335764, |
|
"loss": 0.0434, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 160.87, |
|
"learning_rate": 0.00014014598540145983, |
|
"loss": 0.0393, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 163.04, |
|
"learning_rate": 0.00013795620437956203, |
|
"loss": 0.0384, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 163.04, |
|
"eval_cer": 0.35315985130111527, |
|
"eval_loss": 2.3685200214385986, |
|
"eval_runtime": 1.2136, |
|
"eval_samples_per_second": 37.081, |
|
"eval_steps_per_second": 2.472, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 165.22, |
|
"learning_rate": 0.00013576642335766422, |
|
"loss": 0.0324, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 167.39, |
|
"learning_rate": 0.00013357664233576641, |
|
"loss": 0.0438, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 169.57, |
|
"learning_rate": 0.0001313868613138686, |
|
"loss": 0.0367, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 169.57, |
|
"eval_cer": 0.3469640644361834, |
|
"eval_loss": 2.397036552429199, |
|
"eval_runtime": 1.2259, |
|
"eval_samples_per_second": 36.708, |
|
"eval_steps_per_second": 2.447, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 171.74, |
|
"learning_rate": 0.00012919708029197077, |
|
"loss": 0.0336, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 173.91, |
|
"learning_rate": 0.00012700729927007297, |
|
"loss": 0.037, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 176.09, |
|
"learning_rate": 0.00012481751824817516, |
|
"loss": 0.0307, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 176.09, |
|
"eval_cer": 0.3308550185873606, |
|
"eval_loss": 2.3530125617980957, |
|
"eval_runtime": 1.2484, |
|
"eval_samples_per_second": 36.047, |
|
"eval_steps_per_second": 2.403, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 178.26, |
|
"learning_rate": 0.00012262773722627736, |
|
"loss": 0.0284, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 180.43, |
|
"learning_rate": 0.00012043795620437955, |
|
"loss": 0.0233, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 182.61, |
|
"learning_rate": 0.00011824817518248174, |
|
"loss": 0.0328, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 182.61, |
|
"eval_cer": 0.33147459727385375, |
|
"eval_loss": 2.3414556980133057, |
|
"eval_runtime": 1.2281, |
|
"eval_samples_per_second": 36.64, |
|
"eval_steps_per_second": 2.443, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 184.78, |
|
"learning_rate": 0.00011605839416058394, |
|
"loss": 0.0285, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 186.96, |
|
"learning_rate": 0.00011386861313868612, |
|
"loss": 0.0222, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 189.13, |
|
"learning_rate": 0.00011167883211678831, |
|
"loss": 0.0271, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 189.13, |
|
"eval_cer": 0.3308550185873606, |
|
"eval_loss": 2.4165024757385254, |
|
"eval_runtime": 1.1891, |
|
"eval_samples_per_second": 37.844, |
|
"eval_steps_per_second": 2.523, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 191.3, |
|
"learning_rate": 0.00010948905109489051, |
|
"loss": 0.0307, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 193.48, |
|
"learning_rate": 0.00010729927007299269, |
|
"loss": 0.023, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 195.65, |
|
"learning_rate": 0.00010510948905109488, |
|
"loss": 0.0213, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 195.65, |
|
"eval_cer": 0.3451053283767038, |
|
"eval_loss": 2.447828769683838, |
|
"eval_runtime": 1.1406, |
|
"eval_samples_per_second": 39.452, |
|
"eval_steps_per_second": 2.63, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 197.83, |
|
"learning_rate": 0.00010291970802919708, |
|
"loss": 0.021, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 0.00010072992700729926, |
|
"loss": 0.0246, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 202.17, |
|
"learning_rate": 9.854014598540145e-05, |
|
"loss": 0.0193, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 202.17, |
|
"eval_cer": 0.355638166047088, |
|
"eval_loss": 2.524061918258667, |
|
"eval_runtime": 1.203, |
|
"eval_samples_per_second": 37.406, |
|
"eval_steps_per_second": 2.494, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 204.35, |
|
"learning_rate": 9.635036496350364e-05, |
|
"loss": 0.0223, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 206.52, |
|
"learning_rate": 9.416058394160584e-05, |
|
"loss": 0.0223, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 208.7, |
|
"learning_rate": 9.197080291970803e-05, |
|
"loss": 0.0204, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 208.7, |
|
"eval_cer": 0.34634448574969023, |
|
"eval_loss": 2.570009708404541, |
|
"eval_runtime": 1.2664, |
|
"eval_samples_per_second": 35.533, |
|
"eval_steps_per_second": 2.369, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 210.87, |
|
"learning_rate": 8.978102189781021e-05, |
|
"loss": 0.0202, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 213.04, |
|
"learning_rate": 8.759124087591239e-05, |
|
"loss": 0.0193, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 215.22, |
|
"learning_rate": 8.540145985401459e-05, |
|
"loss": 0.0185, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 215.22, |
|
"eval_cer": 0.31784386617100374, |
|
"eval_loss": 2.583724021911621, |
|
"eval_runtime": 1.2549, |
|
"eval_samples_per_second": 35.859, |
|
"eval_steps_per_second": 2.391, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 217.39, |
|
"learning_rate": 8.321167883211678e-05, |
|
"loss": 0.0191, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 219.57, |
|
"learning_rate": 8.102189781021897e-05, |
|
"loss": 0.0169, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 221.74, |
|
"learning_rate": 7.883211678832117e-05, |
|
"loss": 0.0161, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 221.74, |
|
"eval_cer": 0.33767038413878564, |
|
"eval_loss": 2.513859987258911, |
|
"eval_runtime": 1.2515, |
|
"eval_samples_per_second": 35.958, |
|
"eval_steps_per_second": 2.397, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 223.91, |
|
"learning_rate": 7.664233576642336e-05, |
|
"loss": 0.0183, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 226.09, |
|
"learning_rate": 7.445255474452554e-05, |
|
"loss": 0.0228, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 228.26, |
|
"learning_rate": 7.226277372262774e-05, |
|
"loss": 0.0167, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 228.26, |
|
"eval_cer": 0.3351920693928129, |
|
"eval_loss": 2.5287766456604004, |
|
"eval_runtime": 1.2044, |
|
"eval_samples_per_second": 37.363, |
|
"eval_steps_per_second": 2.491, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 230.43, |
|
"learning_rate": 7.007299270072992e-05, |
|
"loss": 0.0181, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 232.61, |
|
"learning_rate": 6.788321167883211e-05, |
|
"loss": 0.0144, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 234.78, |
|
"learning_rate": 6.56934306569343e-05, |
|
"loss": 0.0148, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 234.78, |
|
"eval_cer": 0.338909541511772, |
|
"eval_loss": 2.574066400527954, |
|
"eval_runtime": 1.2534, |
|
"eval_samples_per_second": 35.904, |
|
"eval_steps_per_second": 2.394, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 236.96, |
|
"learning_rate": 6.350364963503648e-05, |
|
"loss": 0.0143, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 239.13, |
|
"learning_rate": 6.131386861313868e-05, |
|
"loss": 0.0197, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 241.3, |
|
"learning_rate": 5.912408759124087e-05, |
|
"loss": 0.0141, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 241.3, |
|
"eval_cer": 0.338909541511772, |
|
"eval_loss": 2.5173895359039307, |
|
"eval_runtime": 1.1989, |
|
"eval_samples_per_second": 37.536, |
|
"eval_steps_per_second": 2.502, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 243.48, |
|
"learning_rate": 5.693430656934306e-05, |
|
"loss": 0.0165, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 245.65, |
|
"learning_rate": 5.4744525547445253e-05, |
|
"loss": 0.0127, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 247.83, |
|
"learning_rate": 5.255474452554744e-05, |
|
"loss": 0.0122, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 247.83, |
|
"eval_cer": 0.3351920693928129, |
|
"eval_loss": 2.5573315620422363, |
|
"eval_runtime": 1.2363, |
|
"eval_samples_per_second": 36.4, |
|
"eval_steps_per_second": 2.427, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 5.036496350364963e-05, |
|
"loss": 0.0135, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 252.17, |
|
"learning_rate": 4.817518248175182e-05, |
|
"loss": 0.0116, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 254.35, |
|
"learning_rate": 4.5985401459854016e-05, |
|
"loss": 0.0115, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 254.35, |
|
"eval_cer": 0.32961586121437425, |
|
"eval_loss": 2.579023838043213, |
|
"eval_runtime": 1.2327, |
|
"eval_samples_per_second": 36.506, |
|
"eval_steps_per_second": 2.434, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 256.52, |
|
"learning_rate": 4.3795620437956196e-05, |
|
"loss": 0.0141, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 258.7, |
|
"learning_rate": 4.160583941605839e-05, |
|
"loss": 0.0143, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 260.87, |
|
"learning_rate": 3.9416058394160584e-05, |
|
"loss": 0.0141, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 260.87, |
|
"eval_cer": 0.32032218091697645, |
|
"eval_loss": 2.577375888824463, |
|
"eval_runtime": 1.2321, |
|
"eval_samples_per_second": 36.524, |
|
"eval_steps_per_second": 2.435, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 263.04, |
|
"learning_rate": 3.722627737226277e-05, |
|
"loss": 0.0116, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 265.22, |
|
"learning_rate": 3.503649635036496e-05, |
|
"loss": 0.0101, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 267.39, |
|
"learning_rate": 3.284671532846715e-05, |
|
"loss": 0.0123, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 267.39, |
|
"eval_cer": 0.3308550185873606, |
|
"eval_loss": 2.614670753479004, |
|
"eval_runtime": 1.1319, |
|
"eval_samples_per_second": 39.755, |
|
"eval_steps_per_second": 2.65, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 269.57, |
|
"learning_rate": 3.065693430656934e-05, |
|
"loss": 0.0151, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 271.74, |
|
"learning_rate": 2.846715328467153e-05, |
|
"loss": 0.0099, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 273.91, |
|
"learning_rate": 2.627737226277372e-05, |
|
"loss": 0.0214, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 273.91, |
|
"eval_cer": 0.3302354399008674, |
|
"eval_loss": 2.620166778564453, |
|
"eval_runtime": 1.262, |
|
"eval_samples_per_second": 35.657, |
|
"eval_steps_per_second": 2.377, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 276.09, |
|
"learning_rate": 2.408759124087591e-05, |
|
"loss": 0.0085, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 278.26, |
|
"learning_rate": 2.1897810218978098e-05, |
|
"loss": 0.0119, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 280.43, |
|
"learning_rate": 1.9708029197080292e-05, |
|
"loss": 0.0107, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 280.43, |
|
"eval_cer": 0.32342007434944237, |
|
"eval_loss": 2.6263809204101562, |
|
"eval_runtime": 1.2547, |
|
"eval_samples_per_second": 35.867, |
|
"eval_steps_per_second": 2.391, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 282.61, |
|
"learning_rate": 1.751824817518248e-05, |
|
"loss": 0.0107, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 284.78, |
|
"learning_rate": 1.532846715328467e-05, |
|
"loss": 0.0105, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 286.96, |
|
"learning_rate": 1.313868613138686e-05, |
|
"loss": 0.0086, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 286.96, |
|
"eval_cer": 0.3215613382899628, |
|
"eval_loss": 2.607461452484131, |
|
"eval_runtime": 1.204, |
|
"eval_samples_per_second": 37.374, |
|
"eval_steps_per_second": 2.492, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 289.13, |
|
"learning_rate": 1.0948905109489049e-05, |
|
"loss": 0.0095, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 291.3, |
|
"learning_rate": 8.75912408759124e-06, |
|
"loss": 0.0108, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 293.48, |
|
"learning_rate": 6.56934306569343e-06, |
|
"loss": 0.0106, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 293.48, |
|
"eval_cer": 0.3246592317224288, |
|
"eval_loss": 2.595982789993286, |
|
"eval_runtime": 1.1323, |
|
"eval_samples_per_second": 39.741, |
|
"eval_steps_per_second": 2.649, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 295.65, |
|
"learning_rate": 4.37956204379562e-06, |
|
"loss": 0.0143, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 297.83, |
|
"learning_rate": 2.18978102189781e-06, |
|
"loss": 0.0105, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0085, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_cer": 0.32403965303593557, |
|
"eval_loss": 2.5951595306396484, |
|
"eval_runtime": 1.2068, |
|
"eval_samples_per_second": 37.288, |
|
"eval_steps_per_second": 2.486, |
|
"step": 6900 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 6900, |
|
"num_train_epochs": 300, |
|
"save_steps": 150, |
|
"total_flos": 2.3112928880616276e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|