xls-r-1b-bem-natbed-native-model / trainer_state.json
csikasote's picture
End of training
7110be8 verified
{
"best_metric": 0.6841106414794922,
"best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-natbed-native-model/checkpoint-1000",
"epoch": 7.303370786516854,
"eval_steps": 100,
"global_step": 1300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5617977528089888,
"grad_norm": 4.858776092529297,
"learning_rate": 0.00029099999999999997,
"loss": 4.5137,
"step": 100
},
{
"epoch": 0.5617977528089888,
"eval_loss": 2.554856777191162,
"eval_runtime": 50.0091,
"eval_samples_per_second": 12.998,
"eval_steps_per_second": 1.64,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 1.1235955056179776,
"grad_norm": 3.9340174198150635,
"learning_rate": 0.00029444656488549615,
"loss": 1.3916,
"step": 200
},
{
"epoch": 1.1235955056179776,
"eval_loss": 1.088287115097046,
"eval_runtime": 49.3058,
"eval_samples_per_second": 13.183,
"eval_steps_per_second": 1.663,
"eval_wer": 0.9840240430243594,
"step": 200
},
{
"epoch": 1.6853932584269664,
"grad_norm": 1.637488603591919,
"learning_rate": 0.00028872137404580147,
"loss": 0.9962,
"step": 300
},
{
"epoch": 1.6853932584269664,
"eval_loss": 0.8152701258659363,
"eval_runtime": 50.2981,
"eval_samples_per_second": 12.923,
"eval_steps_per_second": 1.63,
"eval_wer": 0.8190446061372983,
"step": 300
},
{
"epoch": 2.247191011235955,
"grad_norm": 1.2791478633880615,
"learning_rate": 0.00028299618320610685,
"loss": 0.8625,
"step": 400
},
{
"epoch": 2.247191011235955,
"eval_loss": 0.8690391182899475,
"eval_runtime": 49.7576,
"eval_samples_per_second": 13.063,
"eval_steps_per_second": 1.648,
"eval_wer": 0.8418222081619741,
"step": 400
},
{
"epoch": 2.808988764044944,
"grad_norm": 1.049700379371643,
"learning_rate": 0.00027727099236641217,
"loss": 0.8168,
"step": 500
},
{
"epoch": 2.808988764044944,
"eval_loss": 0.7395117282867432,
"eval_runtime": 49.4925,
"eval_samples_per_second": 13.133,
"eval_steps_per_second": 1.657,
"eval_wer": 0.7390066434672572,
"step": 500
},
{
"epoch": 3.370786516853933,
"grad_norm": 2.349531650543213,
"learning_rate": 0.00027154580152671755,
"loss": 0.7197,
"step": 600
},
{
"epoch": 3.370786516853933,
"eval_loss": 0.7596462965011597,
"eval_runtime": 49.3475,
"eval_samples_per_second": 13.172,
"eval_steps_per_second": 1.662,
"eval_wer": 0.7366339765896868,
"step": 600
},
{
"epoch": 3.932584269662921,
"grad_norm": 0.8924151062965393,
"learning_rate": 0.00026582061068702287,
"loss": 0.6848,
"step": 700
},
{
"epoch": 3.932584269662921,
"eval_loss": 0.7033310532569885,
"eval_runtime": 50.4297,
"eval_samples_per_second": 12.889,
"eval_steps_per_second": 1.626,
"eval_wer": 0.7228725086997786,
"step": 700
},
{
"epoch": 4.49438202247191,
"grad_norm": 1.047180414199829,
"learning_rate": 0.0002600954198473282,
"loss": 0.6134,
"step": 800
},
{
"epoch": 4.49438202247191,
"eval_loss": 0.830004870891571,
"eval_runtime": 50.1876,
"eval_samples_per_second": 12.951,
"eval_steps_per_second": 1.634,
"eval_wer": 0.7662132236633976,
"step": 800
},
{
"epoch": 5.056179775280899,
"grad_norm": 0.8339403867721558,
"learning_rate": 0.00025437022900763357,
"loss": 0.6303,
"step": 900
},
{
"epoch": 5.056179775280899,
"eval_loss": 0.73649001121521,
"eval_runtime": 49.3956,
"eval_samples_per_second": 13.159,
"eval_steps_per_second": 1.66,
"eval_wer": 0.7896235368554255,
"step": 900
},
{
"epoch": 5.617977528089888,
"grad_norm": 0.746986448764801,
"learning_rate": 0.0002486450381679389,
"loss": 0.5467,
"step": 1000
},
{
"epoch": 5.617977528089888,
"eval_loss": 0.6841106414794922,
"eval_runtime": 49.7948,
"eval_samples_per_second": 13.054,
"eval_steps_per_second": 1.647,
"eval_wer": 0.7486554887693768,
"step": 1000
},
{
"epoch": 6.179775280898877,
"grad_norm": 0.8229517936706543,
"learning_rate": 0.00024291984732824427,
"loss": 0.5194,
"step": 1100
},
{
"epoch": 6.179775280898877,
"eval_loss": 0.7867633700370789,
"eval_runtime": 49.8846,
"eval_samples_per_second": 13.03,
"eval_steps_per_second": 1.644,
"eval_wer": 0.694875039544448,
"step": 1100
},
{
"epoch": 6.741573033707866,
"grad_norm": 0.969409704208374,
"learning_rate": 0.0002371946564885496,
"loss": 0.4617,
"step": 1200
},
{
"epoch": 6.741573033707866,
"eval_loss": 0.7563472986221313,
"eval_runtime": 49.4271,
"eval_samples_per_second": 13.151,
"eval_steps_per_second": 1.659,
"eval_wer": 0.7277760202467574,
"step": 1200
},
{
"epoch": 7.303370786516854,
"grad_norm": 0.5057498812675476,
"learning_rate": 0.00023146946564885494,
"loss": 0.4525,
"step": 1300
},
{
"epoch": 7.303370786516854,
"eval_loss": 0.7276196479797363,
"eval_runtime": 49.3168,
"eval_samples_per_second": 13.18,
"eval_steps_per_second": 1.663,
"eval_wer": 0.6730465042708004,
"step": 1300
},
{
"epoch": 7.303370786516854,
"step": 1300,
"total_flos": 1.4772414251163556e+19,
"train_loss": 1.0161105111929087,
"train_runtime": 3259.7695,
"train_samples_per_second": 26.183,
"train_steps_per_second": 1.638
}
],
"logging_steps": 100,
"max_steps": 5340,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4772414251163556e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}