|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0289389067524115, |
|
"eval_steps": 500, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06430868167202572, |
|
"grad_norm": 6.638989448547363, |
|
"learning_rate": 0.00018487499999999998, |
|
"loss": 4.9036, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06430868167202572, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 109.3576, |
|
"eval_samples_per_second": 35.672, |
|
"eval_steps_per_second": 0.558, |
|
"eval_wer": 0.9911607013476308, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12861736334405144, |
|
"grad_norm": 5.176253795623779, |
|
"learning_rate": 0.000292, |
|
"loss": 1.3889, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12861736334405144, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.5051, |
|
"eval_samples_per_second": 37.328, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.793387431773173, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19292604501607716, |
|
"grad_norm": 8.991605758666992, |
|
"learning_rate": 0.0002711666666666666, |
|
"loss": 1.0207, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19292604501607716, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.6871, |
|
"eval_samples_per_second": 37.263, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.6828961986185577, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2572347266881029, |
|
"grad_norm": 5.992569446563721, |
|
"learning_rate": 0.000250375, |
|
"loss": 0.8901, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2572347266881029, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.7623, |
|
"eval_samples_per_second": 37.237, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.6450031396415978, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3215434083601286, |
|
"grad_norm": 5.538565635681152, |
|
"learning_rate": 0.00022954166666666666, |
|
"loss": 0.8467, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3215434083601286, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.5938, |
|
"eval_samples_per_second": 36.943, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.5457904651499783, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3858520900321543, |
|
"grad_norm": 10.923360824584961, |
|
"learning_rate": 0.00020874999999999998, |
|
"loss": 0.7813, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3858520900321543, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 107.2937, |
|
"eval_samples_per_second": 36.358, |
|
"eval_steps_per_second": 0.569, |
|
"eval_wer": 0.5366855045162536, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.45016077170418006, |
|
"grad_norm": 8.696784973144531, |
|
"learning_rate": 0.00018791666666666664, |
|
"loss": 0.7456, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.45016077170418006, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.1616, |
|
"eval_samples_per_second": 37.095, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.5156016036323238, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5144694533762058, |
|
"grad_norm": 25.500185012817383, |
|
"learning_rate": 0.00016708333333333332, |
|
"loss": 0.7312, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5144694533762058, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 106.9175, |
|
"eval_samples_per_second": 36.486, |
|
"eval_steps_per_second": 0.571, |
|
"eval_wer": 0.48007535139834806, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5787781350482315, |
|
"grad_norm": 4.240171909332275, |
|
"learning_rate": 0.00014629166666666667, |
|
"loss": 0.6929, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5787781350482315, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 107.3751, |
|
"eval_samples_per_second": 36.331, |
|
"eval_steps_per_second": 0.568, |
|
"eval_wer": 0.4659469642080858, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6430868167202572, |
|
"grad_norm": 7.269958019256592, |
|
"learning_rate": 0.0001255, |
|
"loss": 0.6567, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6430868167202572, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 106.0885, |
|
"eval_samples_per_second": 36.771, |
|
"eval_steps_per_second": 0.575, |
|
"eval_wer": 0.44049171617639954, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.707395498392283, |
|
"grad_norm": 9.208508491516113, |
|
"learning_rate": 0.00010466666666666667, |
|
"loss": 0.6413, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.707395498392283, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 107.3705, |
|
"eval_samples_per_second": 36.332, |
|
"eval_steps_per_second": 0.568, |
|
"eval_wer": 0.4199391392551804, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7717041800643086, |
|
"grad_norm": 12.485828399658203, |
|
"learning_rate": 8.387499999999999e-05, |
|
"loss": 0.6187, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7717041800643086, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.1802, |
|
"eval_samples_per_second": 37.089, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.41023040139110273, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8360128617363344, |
|
"grad_norm": 2.41277813911438, |
|
"learning_rate": 6.304166666666666e-05, |
|
"loss": 0.6086, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8360128617363344, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.1613, |
|
"eval_samples_per_second": 37.095, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.40366130512486115, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9003215434083601, |
|
"grad_norm": 6.724209308624268, |
|
"learning_rate": 4.2208333333333334e-05, |
|
"loss": 0.585, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9003215434083601, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 107.3364, |
|
"eval_samples_per_second": 36.344, |
|
"eval_steps_per_second": 0.568, |
|
"eval_wer": 0.38716611119161476, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9646302250803859, |
|
"grad_norm": 7.223489761352539, |
|
"learning_rate": 2.1374999999999998e-05, |
|
"loss": 0.5509, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9646302250803859, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 107.0445, |
|
"eval_samples_per_second": 36.443, |
|
"eval_steps_per_second": 0.57, |
|
"eval_wer": 0.3831570303820702, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.0289389067524115, |
|
"grad_norm": 2.0498945713043213, |
|
"learning_rate": 5.416666666666666e-07, |
|
"loss": 0.5263, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.0289389067524115, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 108.0404, |
|
"eval_samples_per_second": 36.107, |
|
"eval_steps_per_second": 0.565, |
|
"eval_wer": 0.37269960875235475, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.0289389067524115, |
|
"step": 8000, |
|
"total_flos": 8.75110352693014e+18, |
|
"train_loss": 1.0117860870361328, |
|
"train_runtime": 5013.4693, |
|
"train_samples_per_second": 12.766, |
|
"train_steps_per_second": 1.596 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.75110352693014e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|