|
{ |
|
"best_metric": 33.39944765252322, |
|
"best_model_checkpoint": "/kaggle/working/whisper-small/checkpoint-1000", |
|
"epoch": 1.3258278145695364, |
|
"eval_steps": 500, |
|
"global_step": 1001, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.033112582781456956, |
|
"grad_norm": 40.76376724243164, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 4.304, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06622516556291391, |
|
"grad_norm": 34.9359245300293, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 3.9709, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09933774834437085, |
|
"grad_norm": 18.766077041625977, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 3.4858, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13245033112582782, |
|
"grad_norm": 16.810834884643555, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 3.2396, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16556291390728478, |
|
"grad_norm": 19.717323303222656, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 3.1781, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1986754966887417, |
|
"grad_norm": 17.67849349975586, |
|
"learning_rate": 2.9e-06, |
|
"loss": 3.2006, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23178807947019867, |
|
"grad_norm": 16.326919555664062, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 3.146, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.26490066225165565, |
|
"grad_norm": 17.752124786376953, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 3.1915, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2980132450331126, |
|
"grad_norm": 17.477537155151367, |
|
"learning_rate": 4.4e-06, |
|
"loss": 3.0694, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.33112582781456956, |
|
"grad_norm": 17.810102462768555, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 3.0796, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.36423841059602646, |
|
"grad_norm": 18.251075744628906, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 3.0895, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3973509933774834, |
|
"grad_norm": 17.90201187133789, |
|
"learning_rate": 5.9e-06, |
|
"loss": 2.9872, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4304635761589404, |
|
"grad_norm": 18.006668090820312, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 3.0425, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.46357615894039733, |
|
"grad_norm": 16.625247955322266, |
|
"learning_rate": 6.9e-06, |
|
"loss": 2.9893, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4966887417218543, |
|
"grad_norm": 19.76040267944336, |
|
"learning_rate": 7.4e-06, |
|
"loss": 2.9231, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5298013245033113, |
|
"grad_norm": 21.64992904663086, |
|
"learning_rate": 7.9e-06, |
|
"loss": 3.0319, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5629139072847682, |
|
"grad_norm": 18.87610626220703, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 2.9039, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5960264900662252, |
|
"grad_norm": 18.467191696166992, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 2.9607, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6291390728476821, |
|
"grad_norm": 17.798429489135742, |
|
"learning_rate": 9.4e-06, |
|
"loss": 2.9454, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.6622516556291391, |
|
"grad_norm": 18.094806671142578, |
|
"learning_rate": 9.9e-06, |
|
"loss": 2.9388, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6622516556291391, |
|
"eval_cer": 102.70901330655285, |
|
"eval_loss": 2.9817957878112793, |
|
"eval_runtime": 2198.2774, |
|
"eval_samples_per_second": 2.264, |
|
"eval_steps_per_second": 0.283, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.695364238410596, |
|
"grad_norm": 16.370372772216797, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 2.9295, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7284768211920529, |
|
"grad_norm": 18.59728240966797, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 2.9214, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7615894039735099, |
|
"grad_norm": 16.144062042236328, |
|
"learning_rate": 8.6e-06, |
|
"loss": 2.9778, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7947019867549668, |
|
"grad_norm": 17.924297332763672, |
|
"learning_rate": 8.1e-06, |
|
"loss": 2.8858, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8278145695364238, |
|
"grad_norm": 17.39291000366211, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 2.8875, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.8609271523178808, |
|
"grad_norm": 17.19579315185547, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 2.9667, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8940397350993378, |
|
"grad_norm": 17.705387115478516, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 2.9193, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9271523178807947, |
|
"grad_norm": 18.197153091430664, |
|
"learning_rate": 6.1e-06, |
|
"loss": 2.8616, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9602649006622517, |
|
"grad_norm": 16.632579803466797, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 2.7889, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.9933774834437086, |
|
"grad_norm": 23.52530860900879, |
|
"learning_rate": 5.1e-06, |
|
"loss": 2.4231, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0264900662251655, |
|
"grad_norm": 18.322874069213867, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 1.1389, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.0596026490066226, |
|
"grad_norm": 17.467435836791992, |
|
"learning_rate": 4.1e-06, |
|
"loss": 0.8244, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0927152317880795, |
|
"grad_norm": 23.982784271240234, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.6974, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.1258278145695364, |
|
"grad_norm": 15.57445240020752, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 0.6625, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.1589403973509933, |
|
"grad_norm": 16.174701690673828, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.6145, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.1920529801324504, |
|
"grad_norm": 17.91811752319336, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 0.5941, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2251655629139073, |
|
"grad_norm": 16.29407501220703, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.6053, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.2582781456953642, |
|
"grad_norm": 13.573580741882324, |
|
"learning_rate": 1.1e-06, |
|
"loss": 0.5533, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.2913907284768211, |
|
"grad_norm": 14.268310546875, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.5446, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.3245033112582782, |
|
"grad_norm": 13.822824478149414, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 0.5492, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3245033112582782, |
|
"eval_cer": 33.39944765252322, |
|
"eval_loss": 0.6020073294639587, |
|
"eval_runtime": 2262.2462, |
|
"eval_samples_per_second": 2.2, |
|
"eval_steps_per_second": 0.275, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3258278145695364, |
|
"step": 1001, |
|
"total_flos": 4.66193748381696e+18, |
|
"train_loss": 0.0006697479780618246, |
|
"train_runtime": 18.134, |
|
"train_samples_per_second": 882.321, |
|
"train_steps_per_second": 55.145 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.66193748381696e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|