|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.7084282460136673, |
|
"eval_steps": 200, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11389521640091116, |
|
"eval_loss": 0.7147656679153442, |
|
"eval_runtime": 189.2611, |
|
"eval_samples_per_second": 37.102, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.46031703849373495, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22779043280182232, |
|
"eval_loss": 0.6810471415519714, |
|
"eval_runtime": 189.9773, |
|
"eval_samples_per_second": 36.962, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.4794509626755604, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2847380410022779, |
|
"grad_norm": 3.062281370162964, |
|
"learning_rate": 0.0002781111111111111, |
|
"loss": 1.4735, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3416856492027335, |
|
"eval_loss": 0.6035017371177673, |
|
"eval_runtime": 190.2908, |
|
"eval_samples_per_second": 36.901, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.4485975099323669, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45558086560364464, |
|
"eval_loss": 0.6222513914108276, |
|
"eval_runtime": 191.1122, |
|
"eval_samples_per_second": 36.743, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.5104638647869357, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"grad_norm": 3.101222515106201, |
|
"learning_rate": 0.00022266666666666664, |
|
"loss": 0.7681, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"eval_loss": 0.5656484365463257, |
|
"eval_runtime": 192.373, |
|
"eval_samples_per_second": 36.502, |
|
"eval_steps_per_second": 0.572, |
|
"eval_wer": 0.4336225567706188, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.683371298405467, |
|
"eval_loss": 0.5275253057479858, |
|
"eval_runtime": 193.09, |
|
"eval_samples_per_second": 36.366, |
|
"eval_steps_per_second": 0.57, |
|
"eval_wer": 0.4008291367145458, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7972665148063781, |
|
"eval_loss": 0.5284231305122375, |
|
"eval_runtime": 193.9644, |
|
"eval_samples_per_second": 36.203, |
|
"eval_steps_per_second": 0.567, |
|
"eval_wer": 0.40279567892212226, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8542141230068337, |
|
"grad_norm": 6.068333148956299, |
|
"learning_rate": 0.00016744444444444443, |
|
"loss": 0.7159, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9111617312072893, |
|
"eval_loss": 0.4989575147628784, |
|
"eval_runtime": 194.0102, |
|
"eval_samples_per_second": 36.194, |
|
"eval_steps_per_second": 0.567, |
|
"eval_wer": 0.39135518675507247, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0250569476082005, |
|
"eval_loss": 0.4855109453201294, |
|
"eval_runtime": 193.5264, |
|
"eval_samples_per_second": 36.284, |
|
"eval_steps_per_second": 0.568, |
|
"eval_wer": 0.37300522196680796, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"grad_norm": 2.826504707336426, |
|
"learning_rate": 0.0001121111111111111, |
|
"loss": 0.6203, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"eval_loss": 0.47395312786102295, |
|
"eval_runtime": 193.5198, |
|
"eval_samples_per_second": 36.286, |
|
"eval_steps_per_second": 0.568, |
|
"eval_wer": 0.3622822519565766, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2528473804100229, |
|
"eval_loss": 0.45885559916496277, |
|
"eval_runtime": 194.4002, |
|
"eval_samples_per_second": 36.121, |
|
"eval_steps_per_second": 0.566, |
|
"eval_wer": 0.3536454111800582, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.366742596810934, |
|
"eval_loss": 0.4538777470588684, |
|
"eval_runtime": 194.3098, |
|
"eval_samples_per_second": 36.138, |
|
"eval_steps_per_second": 0.566, |
|
"eval_wer": 0.34074329980467455, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4236902050113895, |
|
"grad_norm": 1.8585691452026367, |
|
"learning_rate": 5.666666666666666e-05, |
|
"loss": 0.5447, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4806378132118452, |
|
"eval_loss": 0.4409582316875458, |
|
"eval_runtime": 193.3591, |
|
"eval_samples_per_second": 36.316, |
|
"eval_steps_per_second": 0.569, |
|
"eval_wer": 0.3357339321542938, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.5945330296127562, |
|
"eval_loss": 0.43471136689186096, |
|
"eval_runtime": 193.3767, |
|
"eval_samples_per_second": 36.313, |
|
"eval_steps_per_second": 0.569, |
|
"eval_wer": 0.3293426699796702, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"grad_norm": 1.961743712425232, |
|
"learning_rate": 1.222222222222222e-06, |
|
"loss": 0.5392, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"eval_loss": 0.4313787817955017, |
|
"eval_runtime": 192.9526, |
|
"eval_samples_per_second": 36.392, |
|
"eval_steps_per_second": 0.57, |
|
"eval_wer": 0.3285321356914123, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"step": 3000, |
|
"total_flos": 5.591115044514249e+18, |
|
"train_loss": 0.7769642333984375, |
|
"train_runtime": 4540.3637, |
|
"train_samples_per_second": 10.572, |
|
"train_steps_per_second": 0.661 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.591115044514249e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|