|
{ |
|
"best_metric": 0.6841106414794922, |
|
"best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-natbed-native-model/checkpoint-1000", |
|
"epoch": 7.303370786516854, |
|
"eval_steps": 100, |
|
"global_step": 1300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"grad_norm": 4.858776092529297, |
|
"learning_rate": 0.00029099999999999997, |
|
"loss": 4.5137, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"eval_loss": 2.554856777191162, |
|
"eval_runtime": 50.0091, |
|
"eval_samples_per_second": 12.998, |
|
"eval_steps_per_second": 1.64, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1235955056179776, |
|
"grad_norm": 3.9340174198150635, |
|
"learning_rate": 0.00029444656488549615, |
|
"loss": 1.3916, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1235955056179776, |
|
"eval_loss": 1.088287115097046, |
|
"eval_runtime": 49.3058, |
|
"eval_samples_per_second": 13.183, |
|
"eval_steps_per_second": 1.663, |
|
"eval_wer": 0.9840240430243594, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6853932584269664, |
|
"grad_norm": 1.637488603591919, |
|
"learning_rate": 0.00028872137404580147, |
|
"loss": 0.9962, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6853932584269664, |
|
"eval_loss": 0.8152701258659363, |
|
"eval_runtime": 50.2981, |
|
"eval_samples_per_second": 12.923, |
|
"eval_steps_per_second": 1.63, |
|
"eval_wer": 0.8190446061372983, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.247191011235955, |
|
"grad_norm": 1.2791478633880615, |
|
"learning_rate": 0.00028299618320610685, |
|
"loss": 0.8625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.247191011235955, |
|
"eval_loss": 0.8690391182899475, |
|
"eval_runtime": 49.7576, |
|
"eval_samples_per_second": 13.063, |
|
"eval_steps_per_second": 1.648, |
|
"eval_wer": 0.8418222081619741, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.808988764044944, |
|
"grad_norm": 1.049700379371643, |
|
"learning_rate": 0.00027727099236641217, |
|
"loss": 0.8168, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.808988764044944, |
|
"eval_loss": 0.7395117282867432, |
|
"eval_runtime": 49.4925, |
|
"eval_samples_per_second": 13.133, |
|
"eval_steps_per_second": 1.657, |
|
"eval_wer": 0.7390066434672572, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.370786516853933, |
|
"grad_norm": 2.349531650543213, |
|
"learning_rate": 0.00027154580152671755, |
|
"loss": 0.7197, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.370786516853933, |
|
"eval_loss": 0.7596462965011597, |
|
"eval_runtime": 49.3475, |
|
"eval_samples_per_second": 13.172, |
|
"eval_steps_per_second": 1.662, |
|
"eval_wer": 0.7366339765896868, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.932584269662921, |
|
"grad_norm": 0.8924151062965393, |
|
"learning_rate": 0.00026582061068702287, |
|
"loss": 0.6848, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.932584269662921, |
|
"eval_loss": 0.7033310532569885, |
|
"eval_runtime": 50.4297, |
|
"eval_samples_per_second": 12.889, |
|
"eval_steps_per_second": 1.626, |
|
"eval_wer": 0.7228725086997786, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.49438202247191, |
|
"grad_norm": 1.047180414199829, |
|
"learning_rate": 0.0002600954198473282, |
|
"loss": 0.6134, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.49438202247191, |
|
"eval_loss": 0.830004870891571, |
|
"eval_runtime": 50.1876, |
|
"eval_samples_per_second": 12.951, |
|
"eval_steps_per_second": 1.634, |
|
"eval_wer": 0.7662132236633976, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.056179775280899, |
|
"grad_norm": 0.8339403867721558, |
|
"learning_rate": 0.00025437022900763357, |
|
"loss": 0.6303, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.056179775280899, |
|
"eval_loss": 0.73649001121521, |
|
"eval_runtime": 49.3956, |
|
"eval_samples_per_second": 13.159, |
|
"eval_steps_per_second": 1.66, |
|
"eval_wer": 0.7896235368554255, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.617977528089888, |
|
"grad_norm": 0.746986448764801, |
|
"learning_rate": 0.0002486450381679389, |
|
"loss": 0.5467, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.617977528089888, |
|
"eval_loss": 0.6841106414794922, |
|
"eval_runtime": 49.7948, |
|
"eval_samples_per_second": 13.054, |
|
"eval_steps_per_second": 1.647, |
|
"eval_wer": 0.7486554887693768, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.179775280898877, |
|
"grad_norm": 0.8229517936706543, |
|
"learning_rate": 0.00024291984732824427, |
|
"loss": 0.5194, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.179775280898877, |
|
"eval_loss": 0.7867633700370789, |
|
"eval_runtime": 49.8846, |
|
"eval_samples_per_second": 13.03, |
|
"eval_steps_per_second": 1.644, |
|
"eval_wer": 0.694875039544448, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.741573033707866, |
|
"grad_norm": 0.969409704208374, |
|
"learning_rate": 0.0002371946564885496, |
|
"loss": 0.4617, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.741573033707866, |
|
"eval_loss": 0.7563472986221313, |
|
"eval_runtime": 49.4271, |
|
"eval_samples_per_second": 13.151, |
|
"eval_steps_per_second": 1.659, |
|
"eval_wer": 0.7277760202467574, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.303370786516854, |
|
"grad_norm": 0.5057498812675476, |
|
"learning_rate": 0.00023146946564885494, |
|
"loss": 0.4525, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.303370786516854, |
|
"eval_loss": 0.7276196479797363, |
|
"eval_runtime": 49.3168, |
|
"eval_samples_per_second": 13.18, |
|
"eval_steps_per_second": 1.663, |
|
"eval_wer": 0.6730465042708004, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.303370786516854, |
|
"step": 1300, |
|
"total_flos": 1.4772414251163556e+19, |
|
"train_loss": 1.0161105111929087, |
|
"train_runtime": 3259.7695, |
|
"train_samples_per_second": 26.183, |
|
"train_steps_per_second": 1.638 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4772414251163556e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|