|
{ |
|
"best_metric": 0.2754187285900116, |
|
"best_model_checkpoint": "/models/hfhub/DewiBrynJones/wav2vec2-xlsr-53-ft-ccv-en-cy/checkpoint-9000", |
|
"epoch": 4.524886877828054, |
|
"eval_steps": 500, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.8404507637023926, |
|
"learning_rate": 0.000147375, |
|
"loss": 6.0574, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.0296876430511475, |
|
"eval_runtime": 1138.2592, |
|
"eval_samples_per_second": 23.042, |
|
"eval_steps_per_second": 0.72, |
|
"eval_wer": 0.9990915399659327, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.7007296085357666, |
|
"learning_rate": 0.00029737499999999995, |
|
"loss": 1.224, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.5368403792381287, |
|
"eval_runtime": 1086.8898, |
|
"eval_samples_per_second": 24.131, |
|
"eval_steps_per_second": 0.754, |
|
"eval_wer": 0.4342018379492356, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.096679449081421, |
|
"learning_rate": 0.0002856219512195122, |
|
"loss": 0.434, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.48607054352760315, |
|
"eval_runtime": 1099.4965, |
|
"eval_samples_per_second": 23.855, |
|
"eval_steps_per_second": 0.746, |
|
"eval_wer": 0.3890522154227914, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.0235676765441895, |
|
"learning_rate": 0.00027098780487804874, |
|
"loss": 0.3643, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.7186101078987122, |
|
"learning_rate": 0.00025635365853658536, |
|
"loss": 0.3295, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.4301180839538574, |
|
"eval_runtime": 1082.408, |
|
"eval_samples_per_second": 24.231, |
|
"eval_steps_per_second": 0.758, |
|
"eval_wer": 0.34114777195970813, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.1324162483215332, |
|
"learning_rate": 0.00024171951219512195, |
|
"loss": 0.2739, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.381789892911911, |
|
"eval_runtime": 1080.4732, |
|
"eval_samples_per_second": 24.275, |
|
"eval_steps_per_second": 0.759, |
|
"eval_wer": 0.30533089394990853, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.6504969596862793, |
|
"learning_rate": 0.0002270853658536585, |
|
"loss": 0.2619, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.38941365480422974, |
|
"eval_runtime": 1085.3029, |
|
"eval_samples_per_second": 24.167, |
|
"eval_steps_per_second": 0.756, |
|
"eval_wer": 0.30601644480895, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.8715857863426208, |
|
"learning_rate": 0.0002124512195121951, |
|
"loss": 0.2517, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.3497116267681122, |
|
"eval_runtime": 1075.6705, |
|
"eval_samples_per_second": 24.383, |
|
"eval_steps_per_second": 0.762, |
|
"eval_wer": 0.2801926271738902, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.5574731826782227, |
|
"learning_rate": 0.0001978170731707317, |
|
"loss": 0.2346, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 1.0655726194381714, |
|
"learning_rate": 0.00018318292682926828, |
|
"loss": 0.2244, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.35193705558776855, |
|
"eval_runtime": 1078.0396, |
|
"eval_samples_per_second": 24.329, |
|
"eval_steps_per_second": 0.761, |
|
"eval_wer": 0.2792000504700019, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.5482127070426941, |
|
"learning_rate": 0.0001685487804878049, |
|
"loss": 0.1854, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 0.33764052391052246, |
|
"eval_runtime": 1076.667, |
|
"eval_samples_per_second": 24.36, |
|
"eval_steps_per_second": 0.762, |
|
"eval_wer": 0.2718104010262234, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.6764945387840271, |
|
"learning_rate": 0.00015391463414634145, |
|
"loss": 0.1779, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.32059213519096375, |
|
"eval_runtime": 1088.6325, |
|
"eval_samples_per_second": 24.093, |
|
"eval_steps_per_second": 0.753, |
|
"eval_wer": 0.25195466111496645, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 1.0232322216033936, |
|
"learning_rate": 0.00013928048780487804, |
|
"loss": 0.1749, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 0.31690067052841187, |
|
"eval_runtime": 1101.7225, |
|
"eval_samples_per_second": 23.806, |
|
"eval_steps_per_second": 0.744, |
|
"eval_wer": 0.2534687611717452, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.6185225248336792, |
|
"learning_rate": 0.00012464634146341463, |
|
"loss": 0.172, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.9978949427604675, |
|
"learning_rate": 0.0001100121951219512, |
|
"loss": 0.1636, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 0.3122297525405884, |
|
"eval_runtime": 1102.4428, |
|
"eval_samples_per_second": 23.791, |
|
"eval_steps_per_second": 0.744, |
|
"eval_wer": 0.24648707757659874, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 0.9038313031196594, |
|
"learning_rate": 9.53780487804878e-05, |
|
"loss": 0.137, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 0.30537155270576477, |
|
"eval_runtime": 1108.9259, |
|
"eval_samples_per_second": 23.652, |
|
"eval_steps_per_second": 0.739, |
|
"eval_wer": 0.23823523226715454, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"grad_norm": 0.9038735032081604, |
|
"learning_rate": 8.074390243902438e-05, |
|
"loss": 0.1311, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 0.29557299613952637, |
|
"eval_runtime": 1090.6163, |
|
"eval_samples_per_second": 24.049, |
|
"eval_steps_per_second": 0.752, |
|
"eval_wer": 0.22798141021596954, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"grad_norm": 0.523389995098114, |
|
"learning_rate": 6.610975609756097e-05, |
|
"loss": 0.1261, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 0.28975382447242737, |
|
"eval_runtime": 1108.1062, |
|
"eval_samples_per_second": 23.669, |
|
"eval_steps_per_second": 0.74, |
|
"eval_wer": 0.22361575505225748, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"grad_norm": 0.8056386709213257, |
|
"learning_rate": 5.147560975609756e-05, |
|
"loss": 0.1242, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 1.5024261474609375, |
|
"learning_rate": 3.684146341463414e-05, |
|
"loss": 0.1187, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 0.28465163707733154, |
|
"eval_runtime": 1092.1505, |
|
"eval_samples_per_second": 24.015, |
|
"eval_steps_per_second": 0.751, |
|
"eval_wer": 0.21763085399449036, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 1.5841491222381592, |
|
"learning_rate": 2.2207317073170727e-05, |
|
"loss": 0.1011, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_loss": 0.2762569189071655, |
|
"eval_runtime": 1102.2478, |
|
"eval_samples_per_second": 23.795, |
|
"eval_steps_per_second": 0.744, |
|
"eval_wer": 0.21240720879860367, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"grad_norm": 0.9246074557304382, |
|
"learning_rate": 7.573170731707317e-06, |
|
"loss": 0.0981, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_loss": 0.2754187285900116, |
|
"eval_runtime": 1092.5524, |
|
"eval_samples_per_second": 24.006, |
|
"eval_steps_per_second": 0.751, |
|
"eval_wer": 0.2115155720985006, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"step": 9000, |
|
"total_flos": 8.933861078537978e+19, |
|
"train_loss": 0.5072881503634983, |
|
"train_runtime": 52839.7574, |
|
"train_samples_per_second": 10.901, |
|
"train_steps_per_second": 0.17 |
|
} |
|
], |
|
"logging_steps": 400, |
|
"max_steps": 9000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 8.933861078537978e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|