sentiment-pt-pl10-1 / trainer_state.json
apwic's picture
End of training
e58a69a verified
raw
history blame
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.3284077644348145,
"learning_rate": 4.75e-05,
"loss": 0.5535,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7268170426065163,
"eval_f1": 0.6326680574676724,
"eval_loss": 0.5077849626541138,
"eval_precision": 0.6605799373040753,
"eval_recall": 0.6242044008001455,
"eval_runtime": 5.0717,
"eval_samples_per_second": 78.673,
"eval_steps_per_second": 9.859,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.732577085494995,
"learning_rate": 4.5e-05,
"loss": 0.4682,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.8170426065162907,
"eval_f1": 0.777617444284111,
"eval_loss": 0.4184626042842865,
"eval_precision": 0.7798245614035089,
"eval_recall": 0.7755501000181851,
"eval_runtime": 5.0889,
"eval_samples_per_second": 78.406,
"eval_steps_per_second": 9.825,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.4716732501983643,
"learning_rate": 4.25e-05,
"loss": 0.3849,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8170426065162907,
"eval_f1": 0.757268931723293,
"eval_loss": 0.38087406754493713,
"eval_precision": 0.7968253968253969,
"eval_recall": 0.7380432805964721,
"eval_runtime": 5.1872,
"eval_samples_per_second": 76.92,
"eval_steps_per_second": 9.639,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 1.9364979267120361,
"learning_rate": 4e-05,
"loss": 0.3127,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8571428571428571,
"eval_f1": 0.8289446964056049,
"eval_loss": 0.32795679569244385,
"eval_precision": 0.8266129032258065,
"eval_recall": 0.8314238952536825,
"eval_runtime": 5.0897,
"eval_samples_per_second": 78.394,
"eval_steps_per_second": 9.824,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 3.656118392944336,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2869,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8341332527115377,
"eval_loss": 0.3168599307537079,
"eval_precision": 0.8333132275770553,
"eval_recall": 0.8349699945444626,
"eval_runtime": 5.064,
"eval_samples_per_second": 78.791,
"eval_steps_per_second": 9.874,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.3921332359313965,
"learning_rate": 3.5e-05,
"loss": 0.274,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8576006759069754,
"eval_loss": 0.3217551112174988,
"eval_precision": 0.8466769923965081,
"eval_recall": 0.8731132933260592,
"eval_runtime": 5.0567,
"eval_samples_per_second": 78.906,
"eval_steps_per_second": 9.888,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 1.9414736032485962,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2539,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8417152566223307,
"eval_loss": 0.30381932854652405,
"eval_precision": 0.8378262413446174,
"eval_recall": 0.8460174577195854,
"eval_runtime": 5.0791,
"eval_samples_per_second": 78.557,
"eval_steps_per_second": 9.844,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 2.456129789352417,
"learning_rate": 3e-05,
"loss": 0.2286,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8341632880321839,
"eval_loss": 0.32023322582244873,
"eval_precision": 0.8479139504563233,
"eval_recall": 0.8235133660665576,
"eval_runtime": 5.2736,
"eval_samples_per_second": 75.66,
"eval_steps_per_second": 9.481,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 6.962612152099609,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2249,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8662440310793597,
"eval_loss": 0.2973268926143646,
"eval_precision": 0.8606158357771261,
"eval_recall": 0.872704128023277,
"eval_runtime": 5.05,
"eval_samples_per_second": 79.009,
"eval_steps_per_second": 9.901,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 3.1556286811828613,
"learning_rate": 2.5e-05,
"loss": 0.2083,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8721804511278195,
"eval_f1": 0.8376560692488731,
"eval_loss": 0.3127811551094055,
"eval_precision": 0.8602278120550546,
"eval_recall": 0.8220585561011093,
"eval_runtime": 5.0596,
"eval_samples_per_second": 78.86,
"eval_steps_per_second": 9.882,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 0.8982645273208618,
"learning_rate": 2.25e-05,
"loss": 0.1935,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8721887408091659,
"eval_loss": 0.2957092523574829,
"eval_precision": 0.866466275659824,
"eval_recall": 0.8787506819421713,
"eval_runtime": 5.0454,
"eval_samples_per_second": 79.082,
"eval_steps_per_second": 9.91,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 6.364419460296631,
"learning_rate": 2e-05,
"loss": 0.1859,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8602993213495533,
"eval_loss": 0.2869341969490051,
"eval_precision": 0.8547653958944281,
"eval_recall": 0.8666575741043827,
"eval_runtime": 5.0666,
"eval_samples_per_second": 78.751,
"eval_steps_per_second": 9.869,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.9051835536956787,
"learning_rate": 1.75e-05,
"loss": 0.1735,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8796992481203008,
"eval_f1": 0.8502252252252251,
"eval_loss": 0.30611610412597656,
"eval_precision": 0.863265306122449,
"eval_recall": 0.8398799781778505,
"eval_runtime": 5.1317,
"eval_samples_per_second": 77.752,
"eval_steps_per_second": 9.743,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 6.753292083740234,
"learning_rate": 1.5e-05,
"loss": 0.1804,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8897243107769424,
"eval_f1": 0.8695225637671682,
"eval_loss": 0.29550090432167053,
"eval_precision": 0.8631532846715328,
"eval_recall": 0.8769776322967813,
"eval_runtime": 5.0486,
"eval_samples_per_second": 79.032,
"eval_steps_per_second": 9.904,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 0.1921367347240448,
"learning_rate": 1.25e-05,
"loss": 0.1628,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8757339815412664,
"eval_loss": 0.2972831130027771,
"eval_precision": 0.8766906299500427,
"eval_recall": 0.8747954173486088,
"eval_runtime": 5.0606,
"eval_samples_per_second": 78.844,
"eval_steps_per_second": 9.88,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 0.38073158264160156,
"learning_rate": 1e-05,
"loss": 0.1619,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8897243107769424,
"eval_f1": 0.8707140332272888,
"eval_loss": 0.3023494482040405,
"eval_precision": 0.8618432385874246,
"eval_recall": 0.8819785415530097,
"eval_runtime": 5.0599,
"eval_samples_per_second": 78.856,
"eval_steps_per_second": 9.882,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 1.6226332187652588,
"learning_rate": 7.5e-06,
"loss": 0.1514,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8775533117267087,
"eval_loss": 0.2997310757637024,
"eval_precision": 0.873246730188791,
"eval_recall": 0.8822967812329514,
"eval_runtime": 5.0625,
"eval_samples_per_second": 78.815,
"eval_steps_per_second": 9.877,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 4.088443756103516,
"learning_rate": 5e-06,
"loss": 0.1503,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8736504011098378,
"eval_loss": 0.3002457618713379,
"eval_precision": 0.8718487394957983,
"eval_recall": 0.8755228223313329,
"eval_runtime": 5.0624,
"eval_samples_per_second": 78.817,
"eval_steps_per_second": 9.877,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 6.321498394012451,
"learning_rate": 2.5e-06,
"loss": 0.154,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8730223677032187,
"eval_loss": 0.303114652633667,
"eval_precision": 0.8730223677032187,
"eval_recall": 0.8730223677032187,
"eval_runtime": 5.2549,
"eval_samples_per_second": 75.929,
"eval_steps_per_second": 9.515,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 1.89798903465271,
"learning_rate": 0.0,
"loss": 0.1408,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8763538792940554,
"eval_loss": 0.30106595158576965,
"eval_precision": 0.8754297605404427,
"eval_recall": 0.877295871976723,
"eval_runtime": 5.0597,
"eval_samples_per_second": 78.858,
"eval_steps_per_second": 9.882,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 8444128359504000.0,
"train_loss": 0.24252970335913487,
"train_runtime": 1919.2772,
"train_samples_per_second": 37.91,
"train_steps_per_second": 1.271
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8444128359504000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}