|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.6775106082036775, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14144271570014144, |
|
"eval_loss": 3.3999907970428467, |
|
"eval_runtime": 162.0918, |
|
"eval_samples_per_second": 34.894, |
|
"eval_steps_per_second": 4.362, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2828854314002829, |
|
"eval_loss": 2.9512617588043213, |
|
"eval_runtime": 159.7591, |
|
"eval_samples_per_second": 35.403, |
|
"eval_steps_per_second": 4.425, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4243281471004243, |
|
"eval_loss": 1.9453805685043335, |
|
"eval_runtime": 160.8922, |
|
"eval_samples_per_second": 35.154, |
|
"eval_steps_per_second": 4.394, |
|
"eval_wer": 0.9325480252282904, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"eval_loss": 1.1412426233291626, |
|
"eval_runtime": 161.9235, |
|
"eval_samples_per_second": 34.93, |
|
"eval_steps_per_second": 4.366, |
|
"eval_wer": 0.773025629503619, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"grad_norm": 2.79933500289917, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.2243, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"eval_loss": 0.9250076413154602, |
|
"eval_runtime": 160.568, |
|
"eval_samples_per_second": 35.225, |
|
"eval_steps_per_second": 4.403, |
|
"eval_wer": 0.67890099661376, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8486562942008486, |
|
"eval_loss": 0.8018025159835815, |
|
"eval_runtime": 160.6715, |
|
"eval_samples_per_second": 35.202, |
|
"eval_steps_per_second": 4.4, |
|
"eval_wer": 0.5982892266213028, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"eval_loss": 0.7181887030601501, |
|
"eval_runtime": 160.5511, |
|
"eval_samples_per_second": 35.229, |
|
"eval_steps_per_second": 4.404, |
|
"eval_wer": 0.5408675835727239, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"eval_loss": 0.6198393106460571, |
|
"eval_runtime": 161.1454, |
|
"eval_samples_per_second": 35.099, |
|
"eval_steps_per_second": 4.387, |
|
"eval_wer": 0.4769944311598273, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.272984441301273, |
|
"eval_loss": 0.6102356910705566, |
|
"eval_runtime": 160.7517, |
|
"eval_samples_per_second": 35.185, |
|
"eval_steps_per_second": 4.398, |
|
"eval_wer": 0.47124905714881804, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"grad_norm": 1.0946769714355469, |
|
"learning_rate": 0.00022928571428571426, |
|
"loss": 0.7983, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"eval_loss": 0.5605342984199524, |
|
"eval_runtime": 160.7985, |
|
"eval_samples_per_second": 35.174, |
|
"eval_steps_per_second": 4.397, |
|
"eval_wer": 0.4425863812167996, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5558698727015559, |
|
"eval_loss": 0.5336170196533203, |
|
"eval_runtime": 160.9878, |
|
"eval_samples_per_second": 35.133, |
|
"eval_steps_per_second": 4.392, |
|
"eval_wer": 0.4162186451830335, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"eval_loss": 0.5258753299713135, |
|
"eval_runtime": 161.623, |
|
"eval_samples_per_second": 34.995, |
|
"eval_steps_per_second": 4.374, |
|
"eval_wer": 0.41161271685577183, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8387553041018387, |
|
"eval_loss": 0.4960057735443115, |
|
"eval_runtime": 161.2649, |
|
"eval_samples_per_second": 35.073, |
|
"eval_steps_per_second": 4.384, |
|
"eval_wer": 0.3872349986358749, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"eval_loss": 0.48569169640541077, |
|
"eval_runtime": 161.7331, |
|
"eval_samples_per_second": 34.971, |
|
"eval_steps_per_second": 4.371, |
|
"eval_wer": 0.38676959124392163, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"grad_norm": 1.4740066528320312, |
|
"learning_rate": 0.0001582857142857143, |
|
"loss": 0.6274, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"eval_loss": 0.46894121170043945, |
|
"eval_runtime": 161.6698, |
|
"eval_samples_per_second": 34.985, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.36555343358315545, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"eval_loss": 0.4679875373840332, |
|
"eval_runtime": 161.7805, |
|
"eval_samples_per_second": 34.961, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.3562131886825761, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4045261669024045, |
|
"eval_loss": 0.4536493718624115, |
|
"eval_runtime": 162.2799, |
|
"eval_samples_per_second": 34.853, |
|
"eval_steps_per_second": 4.357, |
|
"eval_wer": 0.3535330840461556, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.545968882602546, |
|
"eval_loss": 0.44862428307533264, |
|
"eval_runtime": 161.7828, |
|
"eval_samples_per_second": 34.96, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.3500826499333986, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6874115983026874, |
|
"eval_loss": 0.43955981731414795, |
|
"eval_runtime": 162.5762, |
|
"eval_samples_per_second": 34.79, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.35051596026383786, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 0.927208662033081, |
|
"learning_rate": 8.728571428571428e-05, |
|
"loss": 0.4939, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"eval_loss": 0.4298574924468994, |
|
"eval_runtime": 162.5099, |
|
"eval_samples_per_second": 34.804, |
|
"eval_steps_per_second": 4.351, |
|
"eval_wer": 0.3340020221148754, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9702970297029703, |
|
"eval_loss": 0.4291674494743347, |
|
"eval_runtime": 162.0566, |
|
"eval_samples_per_second": 34.901, |
|
"eval_steps_per_second": 4.363, |
|
"eval_wer": 0.33114538364012774, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1117397454031117, |
|
"eval_loss": 0.42761147022247314, |
|
"eval_runtime": 162.7807, |
|
"eval_samples_per_second": 34.746, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.3270690568278474, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.253182461103253, |
|
"eval_loss": 0.4232628345489502, |
|
"eval_runtime": 161.9234, |
|
"eval_samples_per_second": 34.93, |
|
"eval_steps_per_second": 4.366, |
|
"eval_wer": 0.3260419508593988, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.3946251768033946, |
|
"eval_loss": 0.4192351996898651, |
|
"eval_runtime": 162.4093, |
|
"eval_samples_per_second": 34.826, |
|
"eval_steps_per_second": 4.353, |
|
"eval_wer": 0.32230264319301566, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"grad_norm": 0.7725916504859924, |
|
"learning_rate": 1.614285714285714e-05, |
|
"loss": 0.4072, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"eval_loss": 0.4178767800331116, |
|
"eval_runtime": 162.2884, |
|
"eval_samples_per_second": 34.852, |
|
"eval_steps_per_second": 4.356, |
|
"eval_wer": 0.31946205324902505, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.6775106082036775, |
|
"eval_loss": 0.4158582091331482, |
|
"eval_runtime": 162.2061, |
|
"eval_samples_per_second": 34.869, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.3171350162892587, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.6775106082036775, |
|
"step": 2600, |
|
"total_flos": 9.931497341662648e+18, |
|
"train_loss": 1.0819015429570125, |
|
"train_runtime": 7138.5232, |
|
"train_samples_per_second": 11.655, |
|
"train_steps_per_second": 0.364 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 400, |
|
"total_flos": 9.931497341662648e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|