|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.536067892503536, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14144271570014144, |
|
"eval_loss": 4.031618595123291, |
|
"eval_runtime": 161.4756, |
|
"eval_samples_per_second": 35.027, |
|
"eval_steps_per_second": 4.378, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2828854314002829, |
|
"eval_loss": 3.0548782348632812, |
|
"eval_runtime": 158.3539, |
|
"eval_samples_per_second": 35.717, |
|
"eval_steps_per_second": 4.465, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4243281471004243, |
|
"eval_loss": 2.5533947944641113, |
|
"eval_runtime": 158.9465, |
|
"eval_samples_per_second": 35.584, |
|
"eval_steps_per_second": 4.448, |
|
"eval_wer": 0.9862296976311228, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"eval_loss": 1.4279608726501465, |
|
"eval_runtime": 159.2587, |
|
"eval_samples_per_second": 35.515, |
|
"eval_steps_per_second": 4.439, |
|
"eval_wer": 0.8846536560313283, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"grad_norm": 1.9099421501159668, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.6818, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"eval_loss": 1.1378353834152222, |
|
"eval_runtime": 160.4319, |
|
"eval_samples_per_second": 35.255, |
|
"eval_steps_per_second": 4.407, |
|
"eval_wer": 0.7958368106824164, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8486562942008486, |
|
"eval_loss": 0.9263126254081726, |
|
"eval_runtime": 159.8222, |
|
"eval_samples_per_second": 35.389, |
|
"eval_steps_per_second": 4.424, |
|
"eval_wer": 0.6777299865185851, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"eval_loss": 0.8501134514808655, |
|
"eval_runtime": 160.338, |
|
"eval_samples_per_second": 35.275, |
|
"eval_steps_per_second": 4.409, |
|
"eval_wer": 0.6388104256275278, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"eval_loss": 0.6984566450119019, |
|
"eval_runtime": 159.8325, |
|
"eval_samples_per_second": 35.387, |
|
"eval_steps_per_second": 4.423, |
|
"eval_wer": 0.5563972523592476, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.272984441301273, |
|
"eval_loss": 0.6664860248565674, |
|
"eval_runtime": 162.9316, |
|
"eval_samples_per_second": 34.714, |
|
"eval_steps_per_second": 4.339, |
|
"eval_wer": 0.5401393079540348, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"grad_norm": 0.8423302173614502, |
|
"learning_rate": 0.00022574999999999996, |
|
"loss": 0.895, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"eval_loss": 0.6228350400924683, |
|
"eval_runtime": 161.5875, |
|
"eval_samples_per_second": 35.003, |
|
"eval_steps_per_second": 4.375, |
|
"eval_wer": 0.5000802465173011, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5558698727015559, |
|
"eval_loss": 0.5974757075309753, |
|
"eval_runtime": 160.961, |
|
"eval_samples_per_second": 35.139, |
|
"eval_steps_per_second": 4.392, |
|
"eval_wer": 0.48359761186364514, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"eval_loss": 0.5826218128204346, |
|
"eval_runtime": 162.2219, |
|
"eval_samples_per_second": 34.866, |
|
"eval_steps_per_second": 4.358, |
|
"eval_wer": 0.4677248507414778, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8387553041018387, |
|
"eval_loss": 0.5473496317863464, |
|
"eval_runtime": 164.7294, |
|
"eval_samples_per_second": 34.335, |
|
"eval_steps_per_second": 4.292, |
|
"eval_wer": 0.4512903639982025, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"eval_loss": 0.5284178256988525, |
|
"eval_runtime": 168.0547, |
|
"eval_samples_per_second": 33.656, |
|
"eval_steps_per_second": 4.207, |
|
"eval_wer": 0.4376966039673878, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"grad_norm": 1.0099021196365356, |
|
"learning_rate": 0.0001512, |
|
"loss": 0.687, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"eval_loss": 0.5137470960617065, |
|
"eval_runtime": 160.3266, |
|
"eval_samples_per_second": 35.278, |
|
"eval_steps_per_second": 4.41, |
|
"eval_wer": 0.42363741413622646, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"eval_loss": 0.5110819339752197, |
|
"eval_runtime": 161.254, |
|
"eval_samples_per_second": 35.075, |
|
"eval_steps_per_second": 4.384, |
|
"eval_wer": 0.4103004429607755, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4045261669024045, |
|
"eval_loss": 0.49603915214538574, |
|
"eval_runtime": 160.5657, |
|
"eval_samples_per_second": 35.225, |
|
"eval_steps_per_second": 4.403, |
|
"eval_wer": 0.4084226744559286, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.545968882602546, |
|
"eval_loss": 0.4876905679702759, |
|
"eval_runtime": 161.8676, |
|
"eval_samples_per_second": 34.942, |
|
"eval_steps_per_second": 4.368, |
|
"eval_wer": 0.40148937536110935, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6874115983026874, |
|
"eval_loss": 0.47859108448028564, |
|
"eval_runtime": 161.5548, |
|
"eval_samples_per_second": 35.01, |
|
"eval_steps_per_second": 4.376, |
|
"eval_wer": 0.39929062078705785, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 0.5755736231803894, |
|
"learning_rate": 7.664999999999999e-05, |
|
"loss": 0.5319, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"eval_loss": 0.47312092781066895, |
|
"eval_runtime": 162.5051, |
|
"eval_samples_per_second": 34.805, |
|
"eval_steps_per_second": 4.351, |
|
"eval_wer": 0.39299929383064774, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9702970297029703, |
|
"eval_loss": 0.4668172001838684, |
|
"eval_runtime": 162.21, |
|
"eval_samples_per_second": 34.868, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.3877190729922321, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1117397454031117, |
|
"eval_loss": 0.46727854013442993, |
|
"eval_runtime": 161.4684, |
|
"eval_samples_per_second": 35.029, |
|
"eval_steps_per_second": 4.379, |
|
"eval_wer": 0.3849585927970726, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.253182461103253, |
|
"eval_loss": 0.46300554275512695, |
|
"eval_runtime": 161.1936, |
|
"eval_samples_per_second": 35.088, |
|
"eval_steps_per_second": 4.386, |
|
"eval_wer": 0.3804326892212878, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.3946251768033946, |
|
"eval_loss": 0.4593857228755951, |
|
"eval_runtime": 161.5089, |
|
"eval_samples_per_second": 35.02, |
|
"eval_steps_per_second": 4.377, |
|
"eval_wer": 0.3768697438531168, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"grad_norm": 0.887208104133606, |
|
"learning_rate": 1.9499999999999995e-06, |
|
"loss": 0.4355, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"eval_loss": 0.4583967626094818, |
|
"eval_runtime": 162.1219, |
|
"eval_samples_per_second": 34.887, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.37831418116453747, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"step": 2500, |
|
"total_flos": 9.55169606524761e+18, |
|
"train_loss": 1.2462444946289062, |
|
"train_runtime": 6962.0027, |
|
"train_samples_per_second": 11.491, |
|
"train_steps_per_second": 0.359 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 400, |
|
"total_flos": 9.55169606524761e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|