|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 35.0, |
|
"eval_steps": 500, |
|
"global_step": 875, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.834115982055664, |
|
"learning_rate": 4.874285714285715e-05, |
|
"loss": 3.1488, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.37662337662337664, |
|
"eval_loss": 0.7865502834320068, |
|
"eval_runtime": 8.8116, |
|
"eval_samples_per_second": 2.27, |
|
"eval_steps_per_second": 0.34, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.263820171356201, |
|
"learning_rate": 4.7314285714285714e-05, |
|
"loss": 0.8093, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.4675324675324675, |
|
"eval_loss": 0.8947464823722839, |
|
"eval_runtime": 2.7975, |
|
"eval_samples_per_second": 7.149, |
|
"eval_steps_per_second": 1.072, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 5.471518039703369, |
|
"learning_rate": 4.588571428571429e-05, |
|
"loss": 0.724, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.4155844155844156, |
|
"eval_loss": 0.8557536005973816, |
|
"eval_runtime": 2.7561, |
|
"eval_samples_per_second": 7.257, |
|
"eval_steps_per_second": 1.088, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 6.84343957901001, |
|
"learning_rate": 4.445714285714286e-05, |
|
"loss": 0.7405, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.33766233766233766, |
|
"eval_loss": 0.8277163505554199, |
|
"eval_runtime": 2.8432, |
|
"eval_samples_per_second": 7.034, |
|
"eval_steps_per_second": 1.055, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.826574802398682, |
|
"learning_rate": 4.302857142857143e-05, |
|
"loss": 0.7361, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.36363636363636365, |
|
"eval_loss": 0.8111216425895691, |
|
"eval_runtime": 2.8495, |
|
"eval_samples_per_second": 7.019, |
|
"eval_steps_per_second": 1.053, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.757009983062744, |
|
"learning_rate": 4.16e-05, |
|
"loss": 0.7061, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.37662337662337664, |
|
"eval_loss": 0.7869578003883362, |
|
"eval_runtime": 2.8778, |
|
"eval_samples_per_second": 6.95, |
|
"eval_steps_per_second": 1.042, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.829946517944336, |
|
"learning_rate": 4.017142857142857e-05, |
|
"loss": 0.7159, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.45454545454545453, |
|
"eval_loss": 0.8124594688415527, |
|
"eval_runtime": 2.9455, |
|
"eval_samples_per_second": 6.79, |
|
"eval_steps_per_second": 1.019, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.9984848499298096, |
|
"learning_rate": 3.874285714285715e-05, |
|
"loss": 0.707, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.37662337662337664, |
|
"eval_loss": 0.8081725239753723, |
|
"eval_runtime": 2.8597, |
|
"eval_samples_per_second": 6.994, |
|
"eval_steps_per_second": 1.049, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.7987842559814453, |
|
"learning_rate": 3.7314285714285715e-05, |
|
"loss": 0.7073, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.45454545454545453, |
|
"eval_loss": 0.8078359961509705, |
|
"eval_runtime": 2.8752, |
|
"eval_samples_per_second": 6.956, |
|
"eval_steps_per_second": 1.043, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.148154258728027, |
|
"learning_rate": 3.588571428571429e-05, |
|
"loss": 0.6956, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.37662337662337664, |
|
"eval_loss": 0.8172693848609924, |
|
"eval_runtime": 2.8623, |
|
"eval_samples_per_second": 6.987, |
|
"eval_steps_per_second": 1.048, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.5226118564605713, |
|
"learning_rate": 3.445714285714286e-05, |
|
"loss": 0.7018, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.37662337662337664, |
|
"eval_loss": 0.8221365809440613, |
|
"eval_runtime": 2.8711, |
|
"eval_samples_per_second": 6.966, |
|
"eval_steps_per_second": 1.045, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.736212730407715, |
|
"learning_rate": 3.302857142857143e-05, |
|
"loss": 0.6901, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.45454545454545453, |
|
"eval_loss": 0.8348967432975769, |
|
"eval_runtime": 2.9677, |
|
"eval_samples_per_second": 6.739, |
|
"eval_steps_per_second": 1.011, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.468735456466675, |
|
"learning_rate": 3.16e-05, |
|
"loss": 0.6862, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.36363636363636365, |
|
"eval_loss": 0.7819182276725769, |
|
"eval_runtime": 2.8624, |
|
"eval_samples_per_second": 6.987, |
|
"eval_steps_per_second": 1.048, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.044895648956299, |
|
"learning_rate": 3.0171428571428572e-05, |
|
"loss": 0.7024, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.36363636363636365, |
|
"eval_loss": 0.7919606566429138, |
|
"eval_runtime": 2.907, |
|
"eval_samples_per_second": 6.88, |
|
"eval_steps_per_second": 1.032, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.781301259994507, |
|
"learning_rate": 2.8742857142857143e-05, |
|
"loss": 0.6858, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.36363636363636365, |
|
"eval_loss": 0.812893807888031, |
|
"eval_runtime": 2.8775, |
|
"eval_samples_per_second": 6.95, |
|
"eval_steps_per_second": 1.043, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 3.412642240524292, |
|
"learning_rate": 2.7314285714285716e-05, |
|
"loss": 0.6782, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.4025974025974026, |
|
"eval_loss": 0.8198434710502625, |
|
"eval_runtime": 2.89, |
|
"eval_samples_per_second": 6.92, |
|
"eval_steps_per_second": 1.038, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.96364426612854, |
|
"learning_rate": 2.5885714285714286e-05, |
|
"loss": 0.675, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.44155844155844154, |
|
"eval_loss": 0.8002452850341797, |
|
"eval_runtime": 2.9999, |
|
"eval_samples_per_second": 6.667, |
|
"eval_steps_per_second": 1.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.237696647644043, |
|
"learning_rate": 2.445714285714286e-05, |
|
"loss": 0.6749, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.36363636363636365, |
|
"eval_loss": 0.7925994396209717, |
|
"eval_runtime": 2.9067, |
|
"eval_samples_per_second": 6.881, |
|
"eval_steps_per_second": 1.032, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.5985169410705566, |
|
"learning_rate": 2.302857142857143e-05, |
|
"loss": 0.6723, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.4025974025974026, |
|
"eval_loss": 0.8083222508430481, |
|
"eval_runtime": 2.8862, |
|
"eval_samples_per_second": 6.93, |
|
"eval_steps_per_second": 1.039, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.7446236610412598, |
|
"learning_rate": 2.16e-05, |
|
"loss": 0.6736, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.42857142857142855, |
|
"eval_loss": 0.832375705242157, |
|
"eval_runtime": 2.9714, |
|
"eval_samples_per_second": 6.731, |
|
"eval_steps_per_second": 1.01, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 3.926819324493408, |
|
"learning_rate": 2.0171428571428573e-05, |
|
"loss": 0.6646, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.45454545454545453, |
|
"eval_loss": 0.8589693903923035, |
|
"eval_runtime": 2.9745, |
|
"eval_samples_per_second": 6.724, |
|
"eval_steps_per_second": 1.009, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 3.9272310733795166, |
|
"learning_rate": 1.8742857142857143e-05, |
|
"loss": 0.6586, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.36363636363636365, |
|
"eval_loss": 0.803028404712677, |
|
"eval_runtime": 2.9162, |
|
"eval_samples_per_second": 6.858, |
|
"eval_steps_per_second": 1.029, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 3.406860113143921, |
|
"learning_rate": 1.7314285714285717e-05, |
|
"loss": 0.6665, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.4675324675324675, |
|
"eval_loss": 0.8490405082702637, |
|
"eval_runtime": 2.8831, |
|
"eval_samples_per_second": 6.937, |
|
"eval_steps_per_second": 1.041, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 3.1726770401000977, |
|
"learning_rate": 1.5885714285714287e-05, |
|
"loss": 0.655, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.4675324675324675, |
|
"eval_loss": 0.8455414772033691, |
|
"eval_runtime": 2.9414, |
|
"eval_samples_per_second": 6.799, |
|
"eval_steps_per_second": 1.02, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 3.506122589111328, |
|
"learning_rate": 1.4457142857142857e-05, |
|
"loss": 0.6641, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.42857142857142855, |
|
"eval_loss": 0.8133634924888611, |
|
"eval_runtime": 3.0374, |
|
"eval_samples_per_second": 6.584, |
|
"eval_steps_per_second": 0.988, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 4.370969772338867, |
|
"learning_rate": 1.3028571428571429e-05, |
|
"loss": 0.6597, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.4675324675324675, |
|
"eval_loss": 0.8322270512580872, |
|
"eval_runtime": 2.8213, |
|
"eval_samples_per_second": 7.089, |
|
"eval_steps_per_second": 1.063, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 3.851907253265381, |
|
"learning_rate": 1.16e-05, |
|
"loss": 0.6611, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.45454545454545453, |
|
"eval_loss": 0.8302900195121765, |
|
"eval_runtime": 2.8659, |
|
"eval_samples_per_second": 6.979, |
|
"eval_steps_per_second": 1.047, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 2.989511013031006, |
|
"learning_rate": 1.0171428571428573e-05, |
|
"loss": 0.6617, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.45454545454545453, |
|
"eval_loss": 0.8202476501464844, |
|
"eval_runtime": 2.9062, |
|
"eval_samples_per_second": 6.882, |
|
"eval_steps_per_second": 1.032, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 3.977627992630005, |
|
"learning_rate": 8.742857142857143e-06, |
|
"loss": 0.6531, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.45454545454545453, |
|
"eval_loss": 0.8274255394935608, |
|
"eval_runtime": 2.8627, |
|
"eval_samples_per_second": 6.987, |
|
"eval_steps_per_second": 1.048, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 3.391641855239868, |
|
"learning_rate": 7.314285714285715e-06, |
|
"loss": 0.6491, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.42857142857142855, |
|
"eval_loss": 0.8236328959465027, |
|
"eval_runtime": 2.8678, |
|
"eval_samples_per_second": 6.974, |
|
"eval_steps_per_second": 1.046, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 3.7944726943969727, |
|
"learning_rate": 5.885714285714286e-06, |
|
"loss": 0.6471, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 0.4155844155844156, |
|
"eval_loss": 0.8234376907348633, |
|
"eval_runtime": 2.9962, |
|
"eval_samples_per_second": 6.675, |
|
"eval_steps_per_second": 1.001, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 4.810728073120117, |
|
"learning_rate": 4.457142857142858e-06, |
|
"loss": 0.6446, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.42857142857142855, |
|
"eval_loss": 0.83391934633255, |
|
"eval_runtime": 2.8811, |
|
"eval_samples_per_second": 6.942, |
|
"eval_steps_per_second": 1.041, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 3.008390188217163, |
|
"learning_rate": 3.028571428571429e-06, |
|
"loss": 0.6439, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 0.4025974025974026, |
|
"eval_loss": 0.8176433444023132, |
|
"eval_runtime": 2.8474, |
|
"eval_samples_per_second": 7.024, |
|
"eval_steps_per_second": 1.054, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 4.271653652191162, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.6453, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 0.4025974025974026, |
|
"eval_loss": 0.8156414031982422, |
|
"eval_runtime": 2.8852, |
|
"eval_samples_per_second": 6.932, |
|
"eval_steps_per_second": 1.04, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 4.2927045822143555, |
|
"learning_rate": 1.7142857142857143e-07, |
|
"loss": 0.6404, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 0.35064935064935066, |
|
"eval_loss": 0.8252768516540527, |
|
"eval_runtime": 2.8782, |
|
"eval_samples_per_second": 6.949, |
|
"eval_steps_per_second": 1.042, |
|
"step": 875 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 875, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 35, |
|
"save_steps": 500, |
|
"total_flos": 5.237996336971776e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|