|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 330, |
|
"global_step": 9870, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0003, |
|
"loss": 33.5779, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00029537037037037037, |
|
"loss": 6.0153, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_cer": 0.9522337427844499, |
|
"eval_loss": 5.343827724456787, |
|
"eval_runtime": 251.7628, |
|
"eval_samples_per_second": 10.573, |
|
"eval_steps_per_second": 0.663, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002907407407407407, |
|
"loss": 5.513, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002861111111111111, |
|
"loss": 5.3776, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.9409077230323093, |
|
"eval_loss": 5.153414726257324, |
|
"eval_runtime": 250.9411, |
|
"eval_samples_per_second": 10.608, |
|
"eval_steps_per_second": 0.665, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00028148148148148146, |
|
"loss": 5.3202, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0002768518518518518, |
|
"loss": 5.2604, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_cer": 0.9108417913714333, |
|
"eval_loss": 5.083229064941406, |
|
"eval_runtime": 252.1577, |
|
"eval_samples_per_second": 10.557, |
|
"eval_steps_per_second": 0.662, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002722222222222222, |
|
"loss": 5.2287, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00026759259259259255, |
|
"loss": 5.2393, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_cer": 0.9073126692747517, |
|
"eval_loss": 5.065478324890137, |
|
"eval_runtime": 251.9951, |
|
"eval_samples_per_second": 10.564, |
|
"eval_steps_per_second": 0.663, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00026296296296296294, |
|
"loss": 5.2058, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00025833333333333334, |
|
"loss": 5.1796, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0002537037037037037, |
|
"loss": 5.1721, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_cer": 0.9000355647963232, |
|
"eval_loss": 5.046383380889893, |
|
"eval_runtime": 251.3928, |
|
"eval_samples_per_second": 10.589, |
|
"eval_steps_per_second": 0.664, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0002490740740740741, |
|
"loss": 5.1736, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00024444444444444443, |
|
"loss": 5.1619, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_cer": 0.9044674855689, |
|
"eval_loss": 5.024378776550293, |
|
"eval_runtime": 248.9825, |
|
"eval_samples_per_second": 10.692, |
|
"eval_steps_per_second": 0.671, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0002398148148148148, |
|
"loss": 5.1484, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00023518518518518517, |
|
"loss": 5.1308, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_cer": 0.9020326648975461, |
|
"eval_loss": 5.021634578704834, |
|
"eval_runtime": 250.5536, |
|
"eval_samples_per_second": 10.624, |
|
"eval_steps_per_second": 0.667, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.00023055555555555552, |
|
"loss": 5.0855, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00022592592592592591, |
|
"loss": 5.0971, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_cer": 0.9040297649987689, |
|
"eval_loss": 4.9340667724609375, |
|
"eval_runtime": 248.8115, |
|
"eval_samples_per_second": 10.699, |
|
"eval_steps_per_second": 0.671, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00022129629629629626, |
|
"loss": 5.0599, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00021666666666666666, |
|
"loss": 5.0137, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_cer": 0.9143709134681148, |
|
"eval_loss": 4.879497051239014, |
|
"eval_runtime": 248.574, |
|
"eval_samples_per_second": 10.709, |
|
"eval_steps_per_second": 0.672, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.00021203703703703703, |
|
"loss": 4.9809, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00020740740740740737, |
|
"loss": 4.9939, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00020277777777777777, |
|
"loss": 4.9341, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_cer": 0.9039476923918693, |
|
"eval_loss": 4.725036144256592, |
|
"eval_runtime": 250.9019, |
|
"eval_samples_per_second": 10.61, |
|
"eval_steps_per_second": 0.666, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00019814814814814814, |
|
"loss": 4.8114, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00019351851851851849, |
|
"loss": 4.6832, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_cer": 0.8367302273411211, |
|
"eval_loss": 4.214047908782959, |
|
"eval_runtime": 249.955, |
|
"eval_samples_per_second": 10.65, |
|
"eval_steps_per_second": 0.668, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00018888888888888888, |
|
"loss": 4.4588, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00018425925925925923, |
|
"loss": 4.1627, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_cer": 0.7318140781878368, |
|
"eval_loss": 3.4010486602783203, |
|
"eval_runtime": 249.3206, |
|
"eval_samples_per_second": 10.677, |
|
"eval_steps_per_second": 0.67, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0001796296296296296, |
|
"loss": 3.7597, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.000175, |
|
"loss": 3.5448, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_cer": 0.6479905890077422, |
|
"eval_loss": 2.882997989654541, |
|
"eval_runtime": 248.069, |
|
"eval_samples_per_second": 10.731, |
|
"eval_steps_per_second": 0.673, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.00017037037037037034, |
|
"loss": 3.3922, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00016574074074074074, |
|
"loss": 3.2576, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_cer": 0.6265696386069542, |
|
"eval_loss": 2.6253392696380615, |
|
"eval_runtime": 244.3615, |
|
"eval_samples_per_second": 10.894, |
|
"eval_steps_per_second": 0.683, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.0001611111111111111, |
|
"loss": 3.0846, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00015648148148148146, |
|
"loss": 2.9344, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00015185185185185185, |
|
"loss": 2.8561, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"eval_cer": 0.5866002790468634, |
|
"eval_loss": 2.430042266845703, |
|
"eval_runtime": 245.691, |
|
"eval_samples_per_second": 10.835, |
|
"eval_steps_per_second": 0.68, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.00014722222222222223, |
|
"loss": 2.8167, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.00014259259259259257, |
|
"loss": 2.7894, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_cer": 0.575028041474024, |
|
"eval_loss": 2.2997841835021973, |
|
"eval_runtime": 245.9682, |
|
"eval_samples_per_second": 10.823, |
|
"eval_steps_per_second": 0.679, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.00013796296296296294, |
|
"loss": 2.6472, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0001333333333333333, |
|
"loss": 2.6018, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"eval_cer": 0.554892895247996, |
|
"eval_loss": 2.187838554382324, |
|
"eval_runtime": 244.7687, |
|
"eval_samples_per_second": 10.876, |
|
"eval_steps_per_second": 0.682, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.0001287037037037037, |
|
"loss": 2.5751, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.00012407407407407406, |
|
"loss": 2.546, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_cer": 0.5350860394495663, |
|
"eval_loss": 2.1450469493865967, |
|
"eval_runtime": 244.8384, |
|
"eval_samples_per_second": 10.872, |
|
"eval_steps_per_second": 0.682, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.00011944444444444443, |
|
"loss": 2.4555, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 0.0001148148148148148, |
|
"loss": 2.3787, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_cer": 0.5339917380242388, |
|
"eval_loss": 2.102729558944702, |
|
"eval_runtime": 244.3347, |
|
"eval_samples_per_second": 10.895, |
|
"eval_steps_per_second": 0.683, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.00011018518518518518, |
|
"loss": 2.3783, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00010555555555555555, |
|
"loss": 2.3806, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 0.00010092592592592591, |
|
"loss": 2.335, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"eval_cer": 0.5165923453615299, |
|
"eval_loss": 2.0303709506988525, |
|
"eval_runtime": 244.6547, |
|
"eval_samples_per_second": 10.881, |
|
"eval_steps_per_second": 0.683, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 9.629629629629628e-05, |
|
"loss": 2.2373, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 2.2138, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"eval_cer": 0.5164555576833639, |
|
"eval_loss": 2.0100014209747314, |
|
"eval_runtime": 244.9943, |
|
"eval_samples_per_second": 10.866, |
|
"eval_steps_per_second": 0.682, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 8.703703703703704e-05, |
|
"loss": 2.2174, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 8.24074074074074e-05, |
|
"loss": 2.2381, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"eval_cer": 0.5031050802943671, |
|
"eval_loss": 1.9650695323944092, |
|
"eval_runtime": 244.4808, |
|
"eval_samples_per_second": 10.888, |
|
"eval_steps_per_second": 0.683, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 7.777777777777777e-05, |
|
"loss": 2.155, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 7.314814814814814e-05, |
|
"loss": 2.1108, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"eval_cer": 0.5034880857932318, |
|
"eval_loss": 1.9666314125061035, |
|
"eval_runtime": 244.872, |
|
"eval_samples_per_second": 10.871, |
|
"eval_steps_per_second": 0.682, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 6.851851851851851e-05, |
|
"loss": 2.1221, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 6.388888888888888e-05, |
|
"loss": 2.0916, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"eval_cer": 0.49982217601838425, |
|
"eval_loss": 1.913594365119934, |
|
"eval_runtime": 244.907, |
|
"eval_samples_per_second": 10.869, |
|
"eval_steps_per_second": 0.682, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 5.925925925925925e-05, |
|
"loss": 2.0517, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 5.4629629629629624e-05, |
|
"loss": 2.0279, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 4.9999999999999996e-05, |
|
"loss": 2.0229, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"eval_cer": 0.5027767898667688, |
|
"eval_loss": 1.898772120475769, |
|
"eval_runtime": 244.3507, |
|
"eval_samples_per_second": 10.894, |
|
"eval_steps_per_second": 0.683, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 4.537037037037037e-05, |
|
"loss": 1.9891, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 2.0056, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_cer": 0.4995759581976856, |
|
"eval_loss": 1.8768519163131714, |
|
"eval_runtime": 244.2609, |
|
"eval_samples_per_second": 10.898, |
|
"eval_steps_per_second": 0.684, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 3.61111111111111e-05, |
|
"loss": 1.9451, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 3.1481481481481474e-05, |
|
"loss": 1.9245, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"eval_cer": 0.495472327852707, |
|
"eval_loss": 1.8715523481369019, |
|
"eval_runtime": 244.453, |
|
"eval_samples_per_second": 10.89, |
|
"eval_steps_per_second": 0.683, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 2.685185185185185e-05, |
|
"loss": 1.916, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 2.222222222222222e-05, |
|
"loss": 1.9378, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"eval_cer": 0.49459688671244495, |
|
"eval_loss": 1.8560909032821655, |
|
"eval_runtime": 244.76, |
|
"eval_samples_per_second": 10.876, |
|
"eval_steps_per_second": 0.682, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 1.759259259259259e-05, |
|
"loss": 1.9169, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 1.296296296296296e-05, |
|
"loss": 1.9003, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"eval_cer": 0.49363937296528326, |
|
"eval_loss": 1.848546028137207, |
|
"eval_runtime": 244.6555, |
|
"eval_samples_per_second": 10.881, |
|
"eval_steps_per_second": 0.683, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 8.333333333333332e-06, |
|
"loss": 1.8611, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 3.7037037037037033e-06, |
|
"loss": 1.8698, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 9870, |
|
"total_flos": 3.0352534937393955e+19, |
|
"train_loss": 3.8899900426739014, |
|
"train_runtime": 13723.3522, |
|
"train_samples_per_second": 23.006, |
|
"train_steps_per_second": 0.719 |
|
} |
|
], |
|
"logging_steps": 150, |
|
"max_steps": 9870, |
|
"num_train_epochs": 15, |
|
"save_steps": 330, |
|
"total_flos": 3.0352534937393955e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|