XXL-Albert-squadv2 / trainer_state.json
Yannis98's picture
xxlalbert
8f566e9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 21994,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.045466945530599254,
"grad_norm": 39.23893737792969,
"learning_rate": 6.818181818181818e-06,
"loss": 3.9076,
"step": 500
},
{
"epoch": 0.09093389106119851,
"grad_norm": 18.871334075927734,
"learning_rate": 1.3636363636363637e-05,
"loss": 1.415,
"step": 1000
},
{
"epoch": 0.13640083659179777,
"grad_norm": 14.570379257202148,
"learning_rate": 2.0454545454545454e-05,
"loss": 1.2569,
"step": 1500
},
{
"epoch": 0.18186778212239701,
"grad_norm": 16.230432510375977,
"learning_rate": 2.7272727272727273e-05,
"loss": 1.2395,
"step": 2000
},
{
"epoch": 0.22733472765299628,
"grad_norm": 15.570598602294922,
"learning_rate": 2.954531676265535e-05,
"loss": 1.2152,
"step": 2500
},
{
"epoch": 0.27280167318359555,
"grad_norm": 14.000185012817383,
"learning_rate": 2.8787511367080933e-05,
"loss": 1.1669,
"step": 3000
},
{
"epoch": 0.31826861871419476,
"grad_norm": 19.740062713623047,
"learning_rate": 2.802970597150652e-05,
"loss": 1.1665,
"step": 3500
},
{
"epoch": 0.36373556424479403,
"grad_norm": 13.226868629455566,
"learning_rate": 2.7271900575932103e-05,
"loss": 1.1422,
"step": 4000
},
{
"epoch": 0.4092025097753933,
"grad_norm": 8.243535041809082,
"learning_rate": 2.6514095180357682e-05,
"loss": 1.115,
"step": 4500
},
{
"epoch": 0.45466945530599256,
"grad_norm": 23.291852951049805,
"learning_rate": 2.575628978478327e-05,
"loss": 1.1059,
"step": 5000
},
{
"epoch": 0.5001364008365918,
"grad_norm": 14.429057121276855,
"learning_rate": 2.499848438920885e-05,
"loss": 1.1121,
"step": 5500
},
{
"epoch": 0.5456033463671911,
"grad_norm": 9.61043930053711,
"learning_rate": 2.4240678993634438e-05,
"loss": 1.0893,
"step": 6000
},
{
"epoch": 0.5910702918977903,
"grad_norm": 7.90361213684082,
"learning_rate": 2.3482873598060018e-05,
"loss": 1.0813,
"step": 6500
},
{
"epoch": 0.6365372374283895,
"grad_norm": 11.351470947265625,
"learning_rate": 2.27250682024856e-05,
"loss": 1.0667,
"step": 7000
},
{
"epoch": 0.6820041829589888,
"grad_norm": 15.591328620910645,
"learning_rate": 2.1967262806911187e-05,
"loss": 1.0682,
"step": 7500
},
{
"epoch": 0.7274711284895881,
"grad_norm": 9.37597942352295,
"learning_rate": 2.120945741133677e-05,
"loss": 1.0462,
"step": 8000
},
{
"epoch": 0.7729380740201873,
"grad_norm": 17.270828247070312,
"learning_rate": 2.0451652015762353e-05,
"loss": 1.0626,
"step": 8500
},
{
"epoch": 0.8184050195507866,
"grad_norm": 14.909830093383789,
"learning_rate": 1.9693846620187936e-05,
"loss": 1.0214,
"step": 9000
},
{
"epoch": 0.8638719650813859,
"grad_norm": 7.522629261016846,
"learning_rate": 1.893604122461352e-05,
"loss": 1.0459,
"step": 9500
},
{
"epoch": 0.9093389106119851,
"grad_norm": 47.28670883178711,
"learning_rate": 1.8178235829039105e-05,
"loss": 1.0277,
"step": 10000
},
{
"epoch": 0.9548058561425843,
"grad_norm": 14.066143989562988,
"learning_rate": 1.7420430433464688e-05,
"loss": 1.0183,
"step": 10500
},
{
"epoch": 1.0002728016731837,
"grad_norm": 10.882994651794434,
"learning_rate": 1.6662625037890268e-05,
"loss": 0.9878,
"step": 11000
},
{
"epoch": 1.0457397472037828,
"grad_norm": 8.7605562210083,
"learning_rate": 1.5904819642315854e-05,
"loss": 0.8088,
"step": 11500
},
{
"epoch": 1.0912066927343822,
"grad_norm": 5.1129326820373535,
"learning_rate": 1.5147014246741437e-05,
"loss": 0.8296,
"step": 12000
},
{
"epoch": 1.1366736382649814,
"grad_norm": 11.089341163635254,
"learning_rate": 1.438920885116702e-05,
"loss": 0.8104,
"step": 12500
},
{
"epoch": 1.1821405837955807,
"grad_norm": 10.97964096069336,
"learning_rate": 1.3631403455592605e-05,
"loss": 0.8171,
"step": 13000
},
{
"epoch": 1.22760752932618,
"grad_norm": 21.10997200012207,
"learning_rate": 1.2873598060018188e-05,
"loss": 0.8129,
"step": 13500
},
{
"epoch": 1.273074474856779,
"grad_norm": 1.2680716514587402,
"learning_rate": 1.211579266444377e-05,
"loss": 0.7884,
"step": 14000
},
{
"epoch": 1.3185414203873784,
"grad_norm": 11.013956069946289,
"learning_rate": 1.1357987268869355e-05,
"loss": 0.8036,
"step": 14500
},
{
"epoch": 1.3640083659179776,
"grad_norm": 4.082338333129883,
"learning_rate": 1.0600181873294938e-05,
"loss": 0.7772,
"step": 15000
},
{
"epoch": 1.409475311448577,
"grad_norm": 5.8771071434021,
"learning_rate": 9.842376477720523e-06,
"loss": 0.8058,
"step": 15500
},
{
"epoch": 1.4549422569791761,
"grad_norm": 10.076021194458008,
"learning_rate": 9.084571082146104e-06,
"loss": 0.7847,
"step": 16000
},
{
"epoch": 1.5004092025097755,
"grad_norm": 7.645974636077881,
"learning_rate": 8.326765686571689e-06,
"loss": 0.7793,
"step": 16500
},
{
"epoch": 1.5458761480403747,
"grad_norm": 15.3417329788208,
"learning_rate": 7.568960290997272e-06,
"loss": 0.7844,
"step": 17000
},
{
"epoch": 1.5913430935709738,
"grad_norm": 6.472328186035156,
"learning_rate": 6.8111548954228554e-06,
"loss": 0.7767,
"step": 17500
},
{
"epoch": 1.6368100391015732,
"grad_norm": 10.42813777923584,
"learning_rate": 6.053349499848439e-06,
"loss": 0.7792,
"step": 18000
},
{
"epoch": 1.6822769846321726,
"grad_norm": 3.3688242435455322,
"learning_rate": 5.295544104274023e-06,
"loss": 0.7702,
"step": 18500
},
{
"epoch": 1.7277439301627715,
"grad_norm": 5.880104064941406,
"learning_rate": 4.537738708699606e-06,
"loss": 0.8026,
"step": 19000
},
{
"epoch": 1.7732108756933709,
"grad_norm": 20.736509323120117,
"learning_rate": 3.7799333131251894e-06,
"loss": 0.7483,
"step": 19500
},
{
"epoch": 1.8186778212239703,
"grad_norm": 5.447836399078369,
"learning_rate": 3.0221279175507728e-06,
"loss": 0.7301,
"step": 20000
},
{
"epoch": 1.8641447667545694,
"grad_norm": 6.6951141357421875,
"learning_rate": 2.264322521976356e-06,
"loss": 0.7655,
"step": 20500
},
{
"epoch": 1.9096117122851686,
"grad_norm": 5.314289093017578,
"learning_rate": 1.50651712640194e-06,
"loss": 0.7631,
"step": 21000
},
{
"epoch": 1.955078657815768,
"grad_norm": 9.385791778564453,
"learning_rate": 7.487117308275235e-07,
"loss": 0.744,
"step": 21500
},
{
"epoch": 2.0,
"step": 21994,
"total_flos": 1.2277795031512474e+17,
"train_loss": 1.0133790219709247,
"train_runtime": 87315.1804,
"train_samples_per_second": 3.023,
"train_steps_per_second": 0.252
}
],
"logging_steps": 500,
"max_steps": 21994,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2277795031512474e+17,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}