|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.536067892503536, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14144271570014144, |
|
"eval_loss": 3.7463672161102295, |
|
"eval_runtime": 153.5435, |
|
"eval_samples_per_second": 36.836, |
|
"eval_steps_per_second": 4.605, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2828854314002829, |
|
"eval_loss": 2.9399216175079346, |
|
"eval_runtime": 150.7703, |
|
"eval_samples_per_second": 37.514, |
|
"eval_steps_per_second": 4.689, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4243281471004243, |
|
"eval_loss": 2.5961458683013916, |
|
"eval_runtime": 151.4236, |
|
"eval_samples_per_second": 37.352, |
|
"eval_steps_per_second": 4.669, |
|
"eval_wer": 0.9991041576682503, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"eval_loss": 1.1618728637695312, |
|
"eval_runtime": 152.5862, |
|
"eval_samples_per_second": 37.068, |
|
"eval_steps_per_second": 4.633, |
|
"eval_wer": 0.7905328662155461, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"grad_norm": 1.7047498226165771, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.5448, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"eval_loss": 0.946560800075531, |
|
"eval_runtime": 153.1427, |
|
"eval_samples_per_second": 36.933, |
|
"eval_steps_per_second": 4.617, |
|
"eval_wer": 0.6897506038937147, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8486562942008486, |
|
"eval_loss": 0.7894724607467651, |
|
"eval_runtime": 151.7162, |
|
"eval_samples_per_second": 37.28, |
|
"eval_steps_per_second": 4.66, |
|
"eval_wer": 0.6110604533602086, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"eval_loss": 0.6820164918899536, |
|
"eval_runtime": 152.0086, |
|
"eval_samples_per_second": 37.208, |
|
"eval_steps_per_second": 4.651, |
|
"eval_wer": 0.5378893314776599, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"eval_loss": 0.6039016842842102, |
|
"eval_runtime": 152.002, |
|
"eval_samples_per_second": 37.21, |
|
"eval_steps_per_second": 4.651, |
|
"eval_wer": 0.47239685815296506, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.272984441301273, |
|
"eval_loss": 0.5631398558616638, |
|
"eval_runtime": 153.0321, |
|
"eval_samples_per_second": 36.96, |
|
"eval_steps_per_second": 4.62, |
|
"eval_wer": 0.46745372814384667, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"grad_norm": 0.8790757656097412, |
|
"learning_rate": 0.00022574999999999996, |
|
"loss": 0.7808, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"eval_loss": 0.5279428958892822, |
|
"eval_runtime": 152.9768, |
|
"eval_samples_per_second": 36.973, |
|
"eval_steps_per_second": 4.622, |
|
"eval_wer": 0.4291084769080642, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5558698727015559, |
|
"eval_loss": 0.5024306178092957, |
|
"eval_runtime": 152.8927, |
|
"eval_samples_per_second": 36.993, |
|
"eval_steps_per_second": 4.624, |
|
"eval_wer": 0.39940170529986724, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"eval_loss": 0.4894837439060211, |
|
"eval_runtime": 153.3103, |
|
"eval_samples_per_second": 36.892, |
|
"eval_steps_per_second": 4.612, |
|
"eval_wer": 0.3894514565436483, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8387553041018387, |
|
"eval_loss": 0.4595918357372284, |
|
"eval_runtime": 153.0651, |
|
"eval_samples_per_second": 36.952, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.3695829534002016, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"eval_loss": 0.44729524850845337, |
|
"eval_runtime": 154.1998, |
|
"eval_samples_per_second": 36.68, |
|
"eval_steps_per_second": 4.585, |
|
"eval_wer": 0.3610884484330758, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"grad_norm": 0.8205087184906006, |
|
"learning_rate": 0.0001512, |
|
"loss": 0.6005, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"eval_loss": 0.43324384093284607, |
|
"eval_runtime": 150.9169, |
|
"eval_samples_per_second": 37.478, |
|
"eval_steps_per_second": 4.685, |
|
"eval_wer": 0.3474268528738942, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"eval_loss": 0.4268616735935211, |
|
"eval_runtime": 152.3911, |
|
"eval_samples_per_second": 37.115, |
|
"eval_steps_per_second": 4.639, |
|
"eval_wer": 0.3418118411159636, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4045261669024045, |
|
"eval_loss": 0.4155045449733734, |
|
"eval_runtime": 153.4832, |
|
"eval_samples_per_second": 36.851, |
|
"eval_steps_per_second": 4.606, |
|
"eval_wer": 0.33606885188206875, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.545968882602546, |
|
"eval_loss": 0.4121190905570984, |
|
"eval_runtime": 153.7529, |
|
"eval_samples_per_second": 36.786, |
|
"eval_steps_per_second": 4.598, |
|
"eval_wer": 0.32143142806865993, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6874115983026874, |
|
"eval_loss": 0.4145391285419464, |
|
"eval_runtime": 159.3319, |
|
"eval_samples_per_second": 35.498, |
|
"eval_steps_per_second": 4.437, |
|
"eval_wer": 0.3366447505239078, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 0.8615767359733582, |
|
"learning_rate": 7.664999999999999e-05, |
|
"loss": 0.4666, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"eval_loss": 0.39387884736061096, |
|
"eval_runtime": 153.1343, |
|
"eval_samples_per_second": 36.935, |
|
"eval_steps_per_second": 4.617, |
|
"eval_wer": 0.3114171905744589, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9702970297029703, |
|
"eval_loss": 0.38894009590148926, |
|
"eval_runtime": 152.8789, |
|
"eval_samples_per_second": 36.997, |
|
"eval_steps_per_second": 4.625, |
|
"eval_wer": 0.30807377901489336, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1117397454031117, |
|
"eval_loss": 0.3909347653388977, |
|
"eval_runtime": 154.4919, |
|
"eval_samples_per_second": 36.61, |
|
"eval_steps_per_second": 4.576, |
|
"eval_wer": 0.30644206619634945, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.253182461103253, |
|
"eval_loss": 0.3874327838420868, |
|
"eval_runtime": 153.3025, |
|
"eval_samples_per_second": 36.894, |
|
"eval_steps_per_second": 4.612, |
|
"eval_wer": 0.3015469277407176, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.3946251768033946, |
|
"eval_loss": 0.386868953704834, |
|
"eval_runtime": 153.0443, |
|
"eval_samples_per_second": 36.957, |
|
"eval_steps_per_second": 4.62, |
|
"eval_wer": 0.29833149365711636, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"grad_norm": 0.6678842902183533, |
|
"learning_rate": 1.9499999999999995e-06, |
|
"loss": 0.3805, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"eval_loss": 0.3846580684185028, |
|
"eval_runtime": 154.6331, |
|
"eval_samples_per_second": 36.577, |
|
"eval_steps_per_second": 4.572, |
|
"eval_wer": 0.29666778646958136, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"step": 2500, |
|
"total_flos": 9.55129266706546e+18, |
|
"train_loss": 1.1546670959472656, |
|
"train_runtime": 6557.446, |
|
"train_samples_per_second": 12.2, |
|
"train_steps_per_second": 0.381 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 400, |
|
"total_flos": 9.55129266706546e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|