XXL-Albert-squadv2 / trainer_state.json

xxlalbert

8f566e9 verified 4 months ago

8.5 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.0,
	"eval_steps": 500,
	"global_step": 21994,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.045466945530599254,
	"grad_norm": 39.23893737792969,
	"learning_rate": 6.818181818181818e-06,
	"loss": 3.9076,
	"step": 500
	},
	{
	"epoch": 0.09093389106119851,
	"grad_norm": 18.871334075927734,
	"learning_rate": 1.3636363636363637e-05,
	"loss": 1.415,
	"step": 1000
	},
	{
	"epoch": 0.13640083659179777,
	"grad_norm": 14.570379257202148,
	"learning_rate": 2.0454545454545454e-05,
	"loss": 1.2569,
	"step": 1500
	},
	{
	"epoch": 0.18186778212239701,
	"grad_norm": 16.230432510375977,
	"learning_rate": 2.7272727272727273e-05,
	"loss": 1.2395,
	"step": 2000
	},
	{
	"epoch": 0.22733472765299628,
	"grad_norm": 15.570598602294922,
	"learning_rate": 2.954531676265535e-05,
	"loss": 1.2152,
	"step": 2500
	},
	{
	"epoch": 0.27280167318359555,
	"grad_norm": 14.000185012817383,
	"learning_rate": 2.8787511367080933e-05,
	"loss": 1.1669,
	"step": 3000
	},
	{
	"epoch": 0.31826861871419476,
	"grad_norm": 19.740062713623047,
	"learning_rate": 2.802970597150652e-05,
	"loss": 1.1665,
	"step": 3500
	},
	{
	"epoch": 0.36373556424479403,
	"grad_norm": 13.226868629455566,
	"learning_rate": 2.7271900575932103e-05,
	"loss": 1.1422,
	"step": 4000
	},
	{
	"epoch": 0.4092025097753933,
	"grad_norm": 8.243535041809082,
	"learning_rate": 2.6514095180357682e-05,
	"loss": 1.115,
	"step": 4500
	},
	{
	"epoch": 0.45466945530599256,
	"grad_norm": 23.291852951049805,
	"learning_rate": 2.575628978478327e-05,
	"loss": 1.1059,
	"step": 5000
	},
	{
	"epoch": 0.5001364008365918,
	"grad_norm": 14.429057121276855,
	"learning_rate": 2.499848438920885e-05,
	"loss": 1.1121,
	"step": 5500
	},
	{
	"epoch": 0.5456033463671911,
	"grad_norm": 9.61043930053711,
	"learning_rate": 2.4240678993634438e-05,
	"loss": 1.0893,
	"step": 6000
	},
	{
	"epoch": 0.5910702918977903,
	"grad_norm": 7.90361213684082,
	"learning_rate": 2.3482873598060018e-05,
	"loss": 1.0813,
	"step": 6500
	},
	{
	"epoch": 0.6365372374283895,
	"grad_norm": 11.351470947265625,
	"learning_rate": 2.27250682024856e-05,
	"loss": 1.0667,
	"step": 7000
	},
	{
	"epoch": 0.6820041829589888,
	"grad_norm": 15.591328620910645,
	"learning_rate": 2.1967262806911187e-05,
	"loss": 1.0682,
	"step": 7500
	},
	{
	"epoch": 0.7274711284895881,
	"grad_norm": 9.37597942352295,
	"learning_rate": 2.120945741133677e-05,
	"loss": 1.0462,
	"step": 8000
	},
	{
	"epoch": 0.7729380740201873,
	"grad_norm": 17.270828247070312,
	"learning_rate": 2.0451652015762353e-05,
	"loss": 1.0626,
	"step": 8500
	},
	{
	"epoch": 0.8184050195507866,
	"grad_norm": 14.909830093383789,
	"learning_rate": 1.9693846620187936e-05,
	"loss": 1.0214,
	"step": 9000
	},
	{
	"epoch": 0.8638719650813859,
	"grad_norm": 7.522629261016846,
	"learning_rate": 1.893604122461352e-05,
	"loss": 1.0459,
	"step": 9500
	},
	{
	"epoch": 0.9093389106119851,
	"grad_norm": 47.28670883178711,
	"learning_rate": 1.8178235829039105e-05,
	"loss": 1.0277,
	"step": 10000
	},
	{
	"epoch": 0.9548058561425843,
	"grad_norm": 14.066143989562988,
	"learning_rate": 1.7420430433464688e-05,
	"loss": 1.0183,
	"step": 10500
	},
	{
	"epoch": 1.0002728016731837,
	"grad_norm": 10.882994651794434,
	"learning_rate": 1.6662625037890268e-05,
	"loss": 0.9878,
	"step": 11000
	},
	{
	"epoch": 1.0457397472037828,
	"grad_norm": 8.7605562210083,
	"learning_rate": 1.5904819642315854e-05,
	"loss": 0.8088,
	"step": 11500
	},
	{
	"epoch": 1.0912066927343822,
	"grad_norm": 5.1129326820373535,
	"learning_rate": 1.5147014246741437e-05,
	"loss": 0.8296,
	"step": 12000
	},
	{
	"epoch": 1.1366736382649814,
	"grad_norm": 11.089341163635254,
	"learning_rate": 1.438920885116702e-05,
	"loss": 0.8104,
	"step": 12500
	},
	{
	"epoch": 1.1821405837955807,
	"grad_norm": 10.97964096069336,
	"learning_rate": 1.3631403455592605e-05,
	"loss": 0.8171,
	"step": 13000
	},
	{
	"epoch": 1.22760752932618,
	"grad_norm": 21.10997200012207,
	"learning_rate": 1.2873598060018188e-05,
	"loss": 0.8129,
	"step": 13500
	},
	{
	"epoch": 1.273074474856779,
	"grad_norm": 1.2680716514587402,
	"learning_rate": 1.211579266444377e-05,
	"loss": 0.7884,
	"step": 14000
	},
	{
	"epoch": 1.3185414203873784,
	"grad_norm": 11.013956069946289,
	"learning_rate": 1.1357987268869355e-05,
	"loss": 0.8036,
	"step": 14500
	},
	{
	"epoch": 1.3640083659179776,
	"grad_norm": 4.082338333129883,
	"learning_rate": 1.0600181873294938e-05,
	"loss": 0.7772,
	"step": 15000
	},
	{
	"epoch": 1.409475311448577,
	"grad_norm": 5.8771071434021,
	"learning_rate": 9.842376477720523e-06,
	"loss": 0.8058,
	"step": 15500
	},
	{
	"epoch": 1.4549422569791761,
	"grad_norm": 10.076021194458008,
	"learning_rate": 9.084571082146104e-06,
	"loss": 0.7847,
	"step": 16000
	},
	{
	"epoch": 1.5004092025097755,
	"grad_norm": 7.645974636077881,
	"learning_rate": 8.326765686571689e-06,
	"loss": 0.7793,
	"step": 16500
	},
	{
	"epoch": 1.5458761480403747,
	"grad_norm": 15.3417329788208,
	"learning_rate": 7.568960290997272e-06,
	"loss": 0.7844,
	"step": 17000
	},
	{
	"epoch": 1.5913430935709738,
	"grad_norm": 6.472328186035156,
	"learning_rate": 6.8111548954228554e-06,
	"loss": 0.7767,
	"step": 17500
	},
	{
	"epoch": 1.6368100391015732,
	"grad_norm": 10.42813777923584,
	"learning_rate": 6.053349499848439e-06,
	"loss": 0.7792,
	"step": 18000
	},
	{
	"epoch": 1.6822769846321726,
	"grad_norm": 3.3688242435455322,
	"learning_rate": 5.295544104274023e-06,
	"loss": 0.7702,
	"step": 18500
	},
	{
	"epoch": 1.7277439301627715,
	"grad_norm": 5.880104064941406,
	"learning_rate": 4.537738708699606e-06,
	"loss": 0.8026,
	"step": 19000
	},
	{
	"epoch": 1.7732108756933709,
	"grad_norm": 20.736509323120117,
	"learning_rate": 3.7799333131251894e-06,
	"loss": 0.7483,
	"step": 19500
	},
	{
	"epoch": 1.8186778212239703,
	"grad_norm": 5.447836399078369,
	"learning_rate": 3.0221279175507728e-06,
	"loss": 0.7301,
	"step": 20000
	},
	{
	"epoch": 1.8641447667545694,
	"grad_norm": 6.6951141357421875,
	"learning_rate": 2.264322521976356e-06,
	"loss": 0.7655,
	"step": 20500
	},
	{
	"epoch": 1.9096117122851686,
	"grad_norm": 5.314289093017578,
	"learning_rate": 1.50651712640194e-06,
	"loss": 0.7631,
	"step": 21000
	},
	{
	"epoch": 1.955078657815768,
	"grad_norm": 9.385791778564453,
	"learning_rate": 7.487117308275235e-07,
	"loss": 0.744,
	"step": 21500
	},
	{
	"epoch": 2.0,
	"step": 21994,
	"total_flos": 1.2277795031512474e+17,
	"train_loss": 1.0133790219709247,
	"train_runtime": 87315.1804,
	"train_samples_per_second": 3.023,
	"train_steps_per_second": 0.252
	}
	],
	"logging_steps": 500,
	"max_steps": 21994,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 5000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.2277795031512474e+17,
	"train_batch_size": 12,
	"trial_name": null,
	"trial_params": null
	}