|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.6775106082036775, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14144271570014144, |
|
"eval_loss": 3.626366138458252, |
|
"eval_runtime": 164.728, |
|
"eval_samples_per_second": 34.335, |
|
"eval_steps_per_second": 4.292, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2828854314002829, |
|
"eval_loss": 3.1079769134521484, |
|
"eval_runtime": 161.0986, |
|
"eval_samples_per_second": 35.109, |
|
"eval_steps_per_second": 4.389, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4243281471004243, |
|
"eval_loss": 2.5440056324005127, |
|
"eval_runtime": 161.942, |
|
"eval_samples_per_second": 34.926, |
|
"eval_steps_per_second": 4.366, |
|
"eval_wer": 0.9862761130617415, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"eval_loss": 1.2381267547607422, |
|
"eval_runtime": 162.1273, |
|
"eval_samples_per_second": 34.886, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.8110079681913649, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"grad_norm": 3.555830955505371, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.4586, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"eval_loss": 1.0035638809204102, |
|
"eval_runtime": 161.1941, |
|
"eval_samples_per_second": 35.088, |
|
"eval_steps_per_second": 4.386, |
|
"eval_wer": 0.7184358616709153, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8486562942008486, |
|
"eval_loss": 0.8524216413497925, |
|
"eval_runtime": 162.0442, |
|
"eval_samples_per_second": 34.904, |
|
"eval_steps_per_second": 4.363, |
|
"eval_wer": 0.6379042213778398, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"eval_loss": 0.7411457300186157, |
|
"eval_runtime": 162.8526, |
|
"eval_samples_per_second": 34.731, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.5673288121462813, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"eval_loss": 0.6202757954597473, |
|
"eval_runtime": 162.5052, |
|
"eval_samples_per_second": 34.805, |
|
"eval_steps_per_second": 4.351, |
|
"eval_wer": 0.48383114488640916, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.272984441301273, |
|
"eval_loss": 0.6016646027565002, |
|
"eval_runtime": 162.6225, |
|
"eval_samples_per_second": 34.78, |
|
"eval_steps_per_second": 4.347, |
|
"eval_wer": 0.48198739839353566, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"grad_norm": 1.029418706893921, |
|
"learning_rate": 0.00022928571428571426, |
|
"loss": 0.8146, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"eval_loss": 0.5637466311454773, |
|
"eval_runtime": 163.5441, |
|
"eval_samples_per_second": 34.584, |
|
"eval_steps_per_second": 4.323, |
|
"eval_wer": 0.4461706186971927, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5558698727015559, |
|
"eval_loss": 0.5380699038505554, |
|
"eval_runtime": 163.2757, |
|
"eval_samples_per_second": 34.641, |
|
"eval_steps_per_second": 4.33, |
|
"eval_wer": 0.4207589822519359, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"eval_loss": 0.5155439972877502, |
|
"eval_runtime": 163.2795, |
|
"eval_samples_per_second": 34.64, |
|
"eval_steps_per_second": 4.33, |
|
"eval_wer": 0.4043416221762622, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8387553041018387, |
|
"eval_loss": 0.4857912063598633, |
|
"eval_runtime": 163.399, |
|
"eval_samples_per_second": 34.615, |
|
"eval_steps_per_second": 4.327, |
|
"eval_wer": 0.39034518140862234, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"eval_loss": 0.47579917311668396, |
|
"eval_runtime": 162.8853, |
|
"eval_samples_per_second": 34.724, |
|
"eval_steps_per_second": 4.34, |
|
"eval_wer": 0.38234492488737115, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"grad_norm": 1.043891429901123, |
|
"learning_rate": 0.0001582857142857143, |
|
"loss": 0.6294, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"eval_loss": 0.46035128831863403, |
|
"eval_runtime": 163.2756, |
|
"eval_samples_per_second": 34.641, |
|
"eval_steps_per_second": 4.33, |
|
"eval_wer": 0.36421207894441504, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"eval_loss": 0.45305728912353516, |
|
"eval_runtime": 164.2898, |
|
"eval_samples_per_second": 34.427, |
|
"eval_steps_per_second": 4.303, |
|
"eval_wer": 0.35355041444214647, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4045261669024045, |
|
"eval_loss": 0.44349414110183716, |
|
"eval_runtime": 164.2629, |
|
"eval_samples_per_second": 34.433, |
|
"eval_steps_per_second": 4.304, |
|
"eval_wer": 0.3510813973995158, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.545968882602546, |
|
"eval_loss": 0.4366327226161957, |
|
"eval_runtime": 164.0684, |
|
"eval_samples_per_second": 34.473, |
|
"eval_steps_per_second": 4.309, |
|
"eval_wer": 0.3498949866127972, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6874115983026874, |
|
"eval_loss": 0.43093863129615784, |
|
"eval_runtime": 164.1859, |
|
"eval_samples_per_second": 34.449, |
|
"eval_steps_per_second": 4.306, |
|
"eval_wer": 0.34513331088772387, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 0.763507604598999, |
|
"learning_rate": 8.728571428571428e-05, |
|
"loss": 0.4914, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"eval_loss": 0.4252130389213562, |
|
"eval_runtime": 164.7353, |
|
"eval_samples_per_second": 34.334, |
|
"eval_steps_per_second": 4.292, |
|
"eval_wer": 0.3367803376460969, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9702970297029703, |
|
"eval_loss": 0.42176735401153564, |
|
"eval_runtime": 163.889, |
|
"eval_samples_per_second": 34.511, |
|
"eval_steps_per_second": 4.314, |
|
"eval_wer": 0.3285075272954644, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1117397454031117, |
|
"eval_loss": 0.42082250118255615, |
|
"eval_runtime": 165.1043, |
|
"eval_samples_per_second": 34.257, |
|
"eval_steps_per_second": 4.282, |
|
"eval_wer": 0.32510862071729757, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.253182461103253, |
|
"eval_loss": 0.41444018483161926, |
|
"eval_runtime": 164.0969, |
|
"eval_samples_per_second": 34.467, |
|
"eval_steps_per_second": 4.308, |
|
"eval_wer": 0.32363362352299874, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.3946251768033946, |
|
"eval_loss": 0.41402822732925415, |
|
"eval_runtime": 164.4996, |
|
"eval_samples_per_second": 34.383, |
|
"eval_steps_per_second": 4.298, |
|
"eval_wer": 0.31654722395908486, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"grad_norm": 0.8644358515739441, |
|
"learning_rate": 1.614285714285714e-05, |
|
"loss": 0.4011, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"eval_loss": 0.4133088290691376, |
|
"eval_runtime": 164.6412, |
|
"eval_samples_per_second": 34.353, |
|
"eval_steps_per_second": 4.294, |
|
"eval_wer": 0.31574559504913985, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.6775106082036775, |
|
"eval_loss": 0.4086272120475769, |
|
"eval_runtime": 163.5841, |
|
"eval_samples_per_second": 34.575, |
|
"eval_steps_per_second": 4.322, |
|
"eval_wer": 0.3122344604235807, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.6775106082036775, |
|
"step": 2600, |
|
"total_flos": 9.93185228450562e+18, |
|
"train_loss": 1.1287707856985238, |
|
"train_runtime": 7180.9983, |
|
"train_samples_per_second": 11.586, |
|
"train_steps_per_second": 0.362 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 400, |
|
"total_flos": 9.93185228450562e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|