|
{ |
|
"best_metric": 0.9282445542742083, |
|
"best_model_checkpoint": "output/fine_tuned/t5-base/QNLI/checkpoint-6548", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 8185, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.30543677458766033, |
|
"grad_norm": 3.3223073482513428, |
|
"learning_rate": 4.6945632254123404e-05, |
|
"loss": 0.4673, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6108735491753207, |
|
"grad_norm": 3.269711494445801, |
|
"learning_rate": 4.389126450824679e-05, |
|
"loss": 0.3224, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.916310323762981, |
|
"grad_norm": 1.466129183769226, |
|
"learning_rate": 4.083689676237019e-05, |
|
"loss": 0.2856, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.914881933003844, |
|
"eval_loss": 0.22163903713226318, |
|
"eval_runtime": 10.4314, |
|
"eval_samples_per_second": 523.707, |
|
"eval_steps_per_second": 8.244, |
|
"step": 1637 |
|
}, |
|
{ |
|
"epoch": 1.2217470983506413, |
|
"grad_norm": 2.509833335876465, |
|
"learning_rate": 3.778252901649359e-05, |
|
"loss": 0.2448, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5271838729383018, |
|
"grad_norm": 3.2846474647521973, |
|
"learning_rate": 3.472816127061698e-05, |
|
"loss": 0.2259, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.832620647525962, |
|
"grad_norm": 2.8232882022857666, |
|
"learning_rate": 3.167379352474038e-05, |
|
"loss": 0.2258, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9220208676551346, |
|
"eval_loss": 0.20598259568214417, |
|
"eval_runtime": 9.5458, |
|
"eval_samples_per_second": 572.292, |
|
"eval_steps_per_second": 9.009, |
|
"step": 3274 |
|
}, |
|
{ |
|
"epoch": 2.1380574221136226, |
|
"grad_norm": 2.700801372528076, |
|
"learning_rate": 2.8619425778863777e-05, |
|
"loss": 0.1986, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.4434941967012827, |
|
"grad_norm": 1.884992241859436, |
|
"learning_rate": 2.556505803298717e-05, |
|
"loss": 0.1777, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.748930971288943, |
|
"grad_norm": 1.391169548034668, |
|
"learning_rate": 2.251069028711057e-05, |
|
"loss": 0.1791, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9276954054548783, |
|
"eval_loss": 0.20381644368171692, |
|
"eval_runtime": 9.2262, |
|
"eval_samples_per_second": 592.117, |
|
"eval_steps_per_second": 9.321, |
|
"step": 4911 |
|
}, |
|
{ |
|
"epoch": 3.0543677458766036, |
|
"grad_norm": 2.7919557094573975, |
|
"learning_rate": 1.9456322541233964e-05, |
|
"loss": 0.1724, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.359804520464264, |
|
"grad_norm": 2.4957516193389893, |
|
"learning_rate": 1.6401954795357362e-05, |
|
"loss": 0.1461, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.665241295051924, |
|
"grad_norm": 3.270733118057251, |
|
"learning_rate": 1.3347587049480758e-05, |
|
"loss": 0.1472, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.9706780696395847, |
|
"grad_norm": 4.12385368347168, |
|
"learning_rate": 1.0293219303604154e-05, |
|
"loss": 0.1476, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9282445542742083, |
|
"eval_loss": 0.2214784473180771, |
|
"eval_runtime": 9.3481, |
|
"eval_samples_per_second": 584.394, |
|
"eval_steps_per_second": 9.2, |
|
"step": 6548 |
|
}, |
|
{ |
|
"epoch": 4.276114844227245, |
|
"grad_norm": 3.3767876625061035, |
|
"learning_rate": 7.238851557727551e-06, |
|
"loss": 0.1305, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.581551618814905, |
|
"grad_norm": 2.924355983734131, |
|
"learning_rate": 4.184483811850947e-06, |
|
"loss": 0.1278, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.886988393402565, |
|
"grad_norm": 5.803096294403076, |
|
"learning_rate": 1.1301160659743434e-06, |
|
"loss": 0.1263, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9278784550613216, |
|
"eval_loss": 0.2300529181957245, |
|
"eval_runtime": 9.2683, |
|
"eval_samples_per_second": 589.431, |
|
"eval_steps_per_second": 9.279, |
|
"step": 8185 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 8185, |
|
"total_flos": 7.99682578684032e+16, |
|
"train_loss": 0.2058652666915679, |
|
"train_runtime": 2769.836, |
|
"train_samples_per_second": 189.078, |
|
"train_steps_per_second": 2.955 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8185, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.99682578684032e+16, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|