nerugm-lora-r8-2 / trainer_state.json
apwic's picture
End of training
7cb9ec2 verified
raw
history blame
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.7431385517120361,
"learning_rate": 4.75e-05,
"loss": 1.2665,
"step": 106
},
{
"epoch": 1.0,
"eval_accuracy": 0.8448836627470603,
"eval_f1": 0.0,
"eval_loss": 0.7136898040771484,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 0.9087,
"eval_samples_per_second": 205.786,
"eval_steps_per_second": 3.301,
"step": 106
},
{
"epoch": 2.0,
"grad_norm": 1.2968225479125977,
"learning_rate": 4.5e-05,
"loss": 0.713,
"step": 212
},
{
"epoch": 2.0,
"eval_accuracy": 0.8451338503877909,
"eval_f1": 0.0,
"eval_loss": 0.6074723601341248,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 0.9286,
"eval_samples_per_second": 201.374,
"eval_steps_per_second": 3.231,
"step": 212
},
{
"epoch": 3.0,
"grad_norm": 1.28258216381073,
"learning_rate": 4.25e-05,
"loss": 0.6346,
"step": 318
},
{
"epoch": 3.0,
"eval_accuracy": 0.8493870402802102,
"eval_f1": 0.022222222222222223,
"eval_loss": 0.5231208801269531,
"eval_precision": 0.19047619047619047,
"eval_recall": 0.011799410029498525,
"eval_runtime": 0.957,
"eval_samples_per_second": 195.406,
"eval_steps_per_second": 3.135,
"step": 318
},
{
"epoch": 4.0,
"grad_norm": 1.9985119104385376,
"learning_rate": 4e-05,
"loss": 0.5555,
"step": 424
},
{
"epoch": 4.0,
"eval_accuracy": 0.8656492369276958,
"eval_f1": 0.10501193317422433,
"eval_loss": 0.44577664136886597,
"eval_precision": 0.275,
"eval_recall": 0.06489675516224189,
"eval_runtime": 0.9294,
"eval_samples_per_second": 201.195,
"eval_steps_per_second": 3.228,
"step": 424
},
{
"epoch": 5.0,
"grad_norm": 1.3407347202301025,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4696,
"step": 530
},
{
"epoch": 5.0,
"eval_accuracy": 0.8949211908931699,
"eval_f1": 0.35859519408502777,
"eval_loss": 0.37153393030166626,
"eval_precision": 0.4801980198019802,
"eval_recall": 0.2861356932153392,
"eval_runtime": 0.9105,
"eval_samples_per_second": 205.39,
"eval_steps_per_second": 3.295,
"step": 530
},
{
"epoch": 6.0,
"grad_norm": 1.2348569631576538,
"learning_rate": 3.5e-05,
"loss": 0.3932,
"step": 636
},
{
"epoch": 6.0,
"eval_accuracy": 0.9194395796847635,
"eval_f1": 0.5402124430955993,
"eval_loss": 0.3133937120437622,
"eval_precision": 0.55625,
"eval_recall": 0.5250737463126843,
"eval_runtime": 0.8947,
"eval_samples_per_second": 209.001,
"eval_steps_per_second": 3.353,
"step": 636
},
{
"epoch": 7.0,
"grad_norm": 0.8620750904083252,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3299,
"step": 742
},
{
"epoch": 7.0,
"eval_accuracy": 0.9276957718288716,
"eval_f1": 0.6284916201117318,
"eval_loss": 0.2705799341201782,
"eval_precision": 0.596816976127321,
"eval_recall": 0.6637168141592921,
"eval_runtime": 0.8955,
"eval_samples_per_second": 208.832,
"eval_steps_per_second": 3.35,
"step": 742
},
{
"epoch": 8.0,
"grad_norm": 1.5275648832321167,
"learning_rate": 3e-05,
"loss": 0.2896,
"step": 848
},
{
"epoch": 8.0,
"eval_accuracy": 0.9339504628471353,
"eval_f1": 0.6711772665764547,
"eval_loss": 0.24331486225128174,
"eval_precision": 0.62,
"eval_recall": 0.7315634218289085,
"eval_runtime": 0.9165,
"eval_samples_per_second": 204.036,
"eval_steps_per_second": 3.273,
"step": 848
},
{
"epoch": 9.0,
"grad_norm": 1.4570423364639282,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2656,
"step": 954
},
{
"epoch": 9.0,
"eval_accuracy": 0.9354515886915187,
"eval_f1": 0.6923076923076923,
"eval_loss": 0.22765140235424042,
"eval_precision": 0.6289156626506024,
"eval_recall": 0.7699115044247787,
"eval_runtime": 0.9292,
"eval_samples_per_second": 201.246,
"eval_steps_per_second": 3.229,
"step": 954
},
{
"epoch": 10.0,
"grad_norm": 2.4647762775421143,
"learning_rate": 2.5e-05,
"loss": 0.2442,
"step": 1060
},
{
"epoch": 10.0,
"eval_accuracy": 0.9387040280210157,
"eval_f1": 0.7088948787061994,
"eval_loss": 0.20824101567268372,
"eval_precision": 0.652605459057072,
"eval_recall": 0.775811209439528,
"eval_runtime": 0.9229,
"eval_samples_per_second": 202.614,
"eval_steps_per_second": 3.25,
"step": 1060
},
{
"epoch": 11.0,
"grad_norm": 1.644608974456787,
"learning_rate": 2.25e-05,
"loss": 0.23,
"step": 1166
},
{
"epoch": 11.0,
"eval_accuracy": 0.9382036527395546,
"eval_f1": 0.7078947368421052,
"eval_loss": 0.2019660323858261,
"eval_precision": 0.6389548693586699,
"eval_recall": 0.7935103244837758,
"eval_runtime": 0.908,
"eval_samples_per_second": 205.937,
"eval_steps_per_second": 3.304,
"step": 1166
},
{
"epoch": 12.0,
"grad_norm": 2.0320627689361572,
"learning_rate": 2e-05,
"loss": 0.2229,
"step": 1272
},
{
"epoch": 12.0,
"eval_accuracy": 0.9384538403802852,
"eval_f1": 0.7220026350461134,
"eval_loss": 0.19768132269382477,
"eval_precision": 0.6523809523809524,
"eval_recall": 0.8082595870206489,
"eval_runtime": 0.9098,
"eval_samples_per_second": 205.544,
"eval_steps_per_second": 3.297,
"step": 1272
},
{
"epoch": 13.0,
"grad_norm": 1.1117591857910156,
"learning_rate": 1.75e-05,
"loss": 0.2132,
"step": 1378
},
{
"epoch": 13.0,
"eval_accuracy": 0.940205153865399,
"eval_f1": 0.7267904509283818,
"eval_loss": 0.18858253955841064,
"eval_precision": 0.6602409638554216,
"eval_recall": 0.8082595870206489,
"eval_runtime": 0.9058,
"eval_samples_per_second": 206.445,
"eval_steps_per_second": 3.312,
"step": 1378
},
{
"epoch": 14.0,
"grad_norm": 1.4440829753875732,
"learning_rate": 1.5e-05,
"loss": 0.2055,
"step": 1484
},
{
"epoch": 14.0,
"eval_accuracy": 0.9414560920690518,
"eval_f1": 0.7294751009421264,
"eval_loss": 0.18096885085105896,
"eval_precision": 0.6707920792079208,
"eval_recall": 0.799410029498525,
"eval_runtime": 0.9231,
"eval_samples_per_second": 202.575,
"eval_steps_per_second": 3.25,
"step": 1484
},
{
"epoch": 15.0,
"grad_norm": 0.746239423751831,
"learning_rate": 1.25e-05,
"loss": 0.2038,
"step": 1590
},
{
"epoch": 15.0,
"eval_accuracy": 0.9404553415061296,
"eval_f1": 0.7275132275132277,
"eval_loss": 0.18217705190181732,
"eval_precision": 0.6594724220623501,
"eval_recall": 0.8112094395280236,
"eval_runtime": 0.9121,
"eval_samples_per_second": 205.015,
"eval_steps_per_second": 3.289,
"step": 1590
},
{
"epoch": 16.0,
"grad_norm": 1.0166144371032715,
"learning_rate": 1e-05,
"loss": 0.2004,
"step": 1696
},
{
"epoch": 16.0,
"eval_accuracy": 0.9429572179134351,
"eval_f1": 0.7393617021276596,
"eval_loss": 0.17875301837921143,
"eval_precision": 0.6731234866828087,
"eval_recall": 0.8200589970501475,
"eval_runtime": 0.9135,
"eval_samples_per_second": 204.697,
"eval_steps_per_second": 3.284,
"step": 1696
},
{
"epoch": 17.0,
"grad_norm": 1.0570679903030396,
"learning_rate": 7.5e-06,
"loss": 0.1966,
"step": 1802
},
{
"epoch": 17.0,
"eval_accuracy": 0.9432074055541656,
"eval_f1": 0.7417218543046358,
"eval_loss": 0.1774715930223465,
"eval_precision": 0.6730769230769231,
"eval_recall": 0.8259587020648967,
"eval_runtime": 0.9125,
"eval_samples_per_second": 204.93,
"eval_steps_per_second": 3.288,
"step": 1802
},
{
"epoch": 18.0,
"grad_norm": 1.0107321739196777,
"learning_rate": 5e-06,
"loss": 0.1931,
"step": 1908
},
{
"epoch": 18.0,
"eval_accuracy": 0.9434575931948962,
"eval_f1": 0.7387862796833773,
"eval_loss": 0.17654407024383545,
"eval_precision": 0.6682577565632458,
"eval_recall": 0.8259587020648967,
"eval_runtime": 0.9156,
"eval_samples_per_second": 204.236,
"eval_steps_per_second": 3.277,
"step": 1908
},
{
"epoch": 19.0,
"grad_norm": 1.0605403184890747,
"learning_rate": 2.5e-06,
"loss": 0.1937,
"step": 2014
},
{
"epoch": 19.0,
"eval_accuracy": 0.9437077808356267,
"eval_f1": 0.7427055702917772,
"eval_loss": 0.17490608990192413,
"eval_precision": 0.6746987951807228,
"eval_recall": 0.8259587020648967,
"eval_runtime": 0.9207,
"eval_samples_per_second": 203.097,
"eval_steps_per_second": 3.258,
"step": 2014
},
{
"epoch": 20.0,
"grad_norm": 1.278860092163086,
"learning_rate": 0.0,
"loss": 0.1888,
"step": 2120
},
{
"epoch": 20.0,
"eval_accuracy": 0.9444583437578183,
"eval_f1": 0.7483355525965381,
"eval_loss": 0.17425791919231415,
"eval_precision": 0.6820388349514563,
"eval_recall": 0.8289085545722714,
"eval_runtime": 0.9189,
"eval_samples_per_second": 203.506,
"eval_steps_per_second": 3.265,
"step": 2120
},
{
"epoch": 20.0,
"step": 2120,
"total_flos": 901149122771520.0,
"train_loss": 0.36049160057643675,
"train_runtime": 244.1252,
"train_samples_per_second": 138.208,
"train_steps_per_second": 8.684
}
],
"logging_steps": 500,
"max_steps": 2120,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 901149122771520.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}