sentiment-lora-r4a1d0.1-0 / trainer_state.json
apwic's picture
End of training
7c28e88 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.2719526290893555,
"learning_rate": 4.75e-05,
"loss": 0.5617,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7192982456140351,
"eval_f1": 0.6543102914784331,
"eval_loss": 0.511669397354126,
"eval_precision": 0.6580196140461879,
"eval_recall": 0.6513911620294599,
"eval_runtime": 5.1602,
"eval_samples_per_second": 77.323,
"eval_steps_per_second": 9.69,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.8476994037628174,
"learning_rate": 4.5e-05,
"loss": 0.5046,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7418546365914787,
"eval_f1": 0.7111966887091448,
"eval_loss": 0.49168047308921814,
"eval_precision": 0.7042004048582996,
"eval_recall": 0.7323604291689398,
"eval_runtime": 5.0484,
"eval_samples_per_second": 79.035,
"eval_steps_per_second": 9.904,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 4.1617326736450195,
"learning_rate": 4.25e-05,
"loss": 0.4798,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7593984962406015,
"eval_f1": 0.7179215270413574,
"eval_loss": 0.4465886950492859,
"eval_precision": 0.7129198966408269,
"eval_recall": 0.7247681396617567,
"eval_runtime": 5.0834,
"eval_samples_per_second": 78.491,
"eval_steps_per_second": 9.836,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.705305337905884,
"learning_rate": 4e-05,
"loss": 0.4374,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8195488721804511,
"eval_f1": 0.7740798993394149,
"eval_loss": 0.3993551731109619,
"eval_precision": 0.7865881658357387,
"eval_recall": 0.7648208765230042,
"eval_runtime": 5.1044,
"eval_samples_per_second": 78.167,
"eval_steps_per_second": 9.795,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 0.9225200414657593,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4037,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.7844611528822055,
"eval_f1": 0.7575187969924813,
"eval_loss": 0.41500648856163025,
"eval_precision": 0.748013422818792,
"eval_recall": 0.7800054555373704,
"eval_runtime": 5.0621,
"eval_samples_per_second": 78.822,
"eval_steps_per_second": 9.877,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.181605339050293,
"learning_rate": 3.5e-05,
"loss": 0.3741,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.8049369344976196,
"eval_loss": 0.3736521899700165,
"eval_precision": 0.8027777777777778,
"eval_recall": 0.8072376795781051,
"eval_runtime": 5.0549,
"eval_samples_per_second": 78.934,
"eval_steps_per_second": 9.891,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.7816860675811768,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3574,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8220551378446115,
"eval_f1": 0.790906742443813,
"eval_loss": 0.3775876462459564,
"eval_precision": 0.7844931964944649,
"eval_recall": 0.7990998363338788,
"eval_runtime": 5.059,
"eval_samples_per_second": 78.869,
"eval_steps_per_second": 9.883,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 5.030299663543701,
"learning_rate": 3e-05,
"loss": 0.3387,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8134839254478557,
"eval_loss": 0.3653636872768402,
"eval_precision": 0.8119747899159664,
"eval_recall": 0.8150572831423895,
"eval_runtime": 5.1112,
"eval_samples_per_second": 78.064,
"eval_steps_per_second": 9.782,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 6.920233726501465,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3293,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.8067969337812972,
"eval_loss": 0.3626542389392853,
"eval_precision": 0.8021114369501466,
"eval_recall": 0.8122385888343335,
"eval_runtime": 5.0586,
"eval_samples_per_second": 78.875,
"eval_steps_per_second": 9.884,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 6.010580539703369,
"learning_rate": 2.5e-05,
"loss": 0.3209,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.8039756622954537,
"eval_loss": 0.35534289479255676,
"eval_precision": 0.8032299897460643,
"eval_recall": 0.8047372249499909,
"eval_runtime": 5.0627,
"eval_samples_per_second": 78.811,
"eval_steps_per_second": 9.876,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 4.613595485687256,
"learning_rate": 2.25e-05,
"loss": 0.2967,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.8051873113570456,
"eval_loss": 0.3674112856388092,
"eval_precision": 0.7989231125521075,
"eval_recall": 0.8129659938170577,
"eval_runtime": 5.0568,
"eval_samples_per_second": 78.903,
"eval_steps_per_second": 9.888,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 11.547273635864258,
"learning_rate": 2e-05,
"loss": 0.2928,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8320802005012531,
"eval_f1": 0.8026866442779643,
"eval_loss": 0.37071213126182556,
"eval_precision": 0.795995733394834,
"eval_recall": 0.8111929441716675,
"eval_runtime": 5.0714,
"eval_samples_per_second": 78.677,
"eval_steps_per_second": 9.859,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.8734816312789917,
"learning_rate": 1.75e-05,
"loss": 0.2967,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8160386984618873,
"eval_loss": 0.3514226973056793,
"eval_precision": 0.8152632848784607,
"eval_recall": 0.8168303327877796,
"eval_runtime": 5.0516,
"eval_samples_per_second": 78.984,
"eval_steps_per_second": 9.898,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 11.493008613586426,
"learning_rate": 1.5e-05,
"loss": 0.2934,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8118502107020158,
"eval_loss": 0.3507131040096283,
"eval_precision": 0.8083091673078061,
"eval_recall": 0.8157846881251136,
"eval_runtime": 5.0578,
"eval_samples_per_second": 78.888,
"eval_steps_per_second": 9.886,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 1.0511583089828491,
"learning_rate": 1.25e-05,
"loss": 0.2811,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.8042838456507522,
"eval_loss": 0.35527709126472473,
"eval_precision": 0.7990802919708029,
"eval_recall": 0.8104655391889435,
"eval_runtime": 5.0928,
"eval_samples_per_second": 78.345,
"eval_steps_per_second": 9.818,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 7.683447360992432,
"learning_rate": 1e-05,
"loss": 0.2738,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8136136136136136,
"eval_loss": 0.35545966029167175,
"eval_precision": 0.8076923076923077,
"eval_recall": 0.820785597381342,
"eval_runtime": 5.1153,
"eval_samples_per_second": 78.002,
"eval_steps_per_second": 9.775,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 5.83898401260376,
"learning_rate": 7.5e-06,
"loss": 0.2717,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8203781512605042,
"eval_loss": 0.34679991006851196,
"eval_precision": 0.8174088828111065,
"eval_recall": 0.823604291689398,
"eval_runtime": 5.1026,
"eval_samples_per_second": 78.196,
"eval_steps_per_second": 9.799,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 4.768757343292236,
"learning_rate": 5e-06,
"loss": 0.278,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.8127416435111035,
"eval_loss": 0.3509637117385864,
"eval_precision": 0.8079618768328446,
"eval_recall": 0.8182851427532278,
"eval_runtime": 5.0773,
"eval_samples_per_second": 78.585,
"eval_steps_per_second": 9.848,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 4.281126022338867,
"learning_rate": 2.5e-06,
"loss": 0.2701,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8178232198860786,
"eval_loss": 0.34708452224731445,
"eval_precision": 0.8142125821151684,
"eval_recall": 0.8218312420440079,
"eval_runtime": 5.0544,
"eval_samples_per_second": 78.942,
"eval_steps_per_second": 9.892,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 7.043084621429443,
"learning_rate": 0.0,
"loss": 0.2722,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8152777777777778,
"eval_loss": 0.34833839535713196,
"eval_precision": 0.8110639802050195,
"eval_recall": 0.8200581923986179,
"eval_runtime": 5.0742,
"eval_samples_per_second": 78.633,
"eval_steps_per_second": 9.854,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7597037114448000.0,
"train_loss": 0.34670459247026286,
"train_runtime": 1953.5719,
"train_samples_per_second": 37.245,
"train_steps_per_second": 1.249
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7597037114448000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}