|
{
|
|
"best_metric": 0.9906542056074766,
|
|
"best_model_checkpoint": "resnet-50-finetuned-FBark\\checkpoint-198",
|
|
"epoch": 34.339622641509436,
|
|
"eval_steps": 500,
|
|
"global_step": 455,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.98,
|
|
"step": 13,
|
|
"train_accuracy": 0.18867924528301888,
|
|
"train_f1": 0.07866239279216843,
|
|
"train_loss": 1.6040071249008179,
|
|
"train_precision": 0.10334148329258355,
|
|
"train_recall": 0.20698380566801616,
|
|
"train_runtime": 132.5495,
|
|
"train_samples_per_second": 3.199,
|
|
"train_steps_per_second": 0.4
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"grad_norm": 0.8319346904754639,
|
|
"learning_rate": 8.478260869565217e-05,
|
|
"loss": 1.6424,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"eval_accuracy": 0.2336448598130841,
|
|
"eval_f1": 0.10987810004203447,
|
|
"eval_loss": 1.5959796905517578,
|
|
"eval_precision": 0.1749174917491749,
|
|
"eval_recall": 0.22424242424242422,
|
|
"eval_runtime": 39.1296,
|
|
"eval_samples_per_second": 2.735,
|
|
"eval_steps_per_second": 0.358,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"step": 26,
|
|
"train_accuracy": 0.33962264150943394,
|
|
"train_f1": 0.24495658674026793,
|
|
"train_loss": 1.5701098442077637,
|
|
"train_precision": 0.3021095248242063,
|
|
"train_recall": 0.3113901059286784,
|
|
"train_runtime": 127.6225,
|
|
"train_samples_per_second": 3.322,
|
|
"train_steps_per_second": 0.415
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"grad_norm": 1.3721247911453247,
|
|
"learning_rate": 0.00016956521739130433,
|
|
"loss": 1.621,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"eval_accuracy": 0.411214953271028,
|
|
"eval_f1": 0.26165216896924215,
|
|
"eval_loss": 1.546158790588379,
|
|
"eval_precision": 0.3088888888888889,
|
|
"eval_recall": 0.3116883116883117,
|
|
"eval_runtime": 39.3684,
|
|
"eval_samples_per_second": 2.718,
|
|
"eval_steps_per_second": 0.356,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"step": 39,
|
|
"train_accuracy": 0.37264150943396224,
|
|
"train_f1": 0.2637971738836966,
|
|
"train_loss": 1.5183203220367432,
|
|
"train_precision": 0.35881109762129587,
|
|
"train_recall": 0.34996256447229773,
|
|
"train_runtime": 132.7347,
|
|
"train_samples_per_second": 3.194,
|
|
"train_steps_per_second": 0.399
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"grad_norm": 1.4928213357925415,
|
|
"learning_rate": 0.00025434782608695647,
|
|
"loss": 1.567,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"eval_accuracy": 0.4766355140186916,
|
|
"eval_f1": 0.30495432955791013,
|
|
"eval_loss": 1.4607292413711548,
|
|
"eval_precision": 0.3638922888616891,
|
|
"eval_recall": 0.3748917748917749,
|
|
"eval_runtime": 41.5534,
|
|
"eval_samples_per_second": 2.575,
|
|
"eval_steps_per_second": 0.337,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 53,
|
|
"train_accuracy": 0.4386792452830189,
|
|
"train_f1": 0.3491466500711846,
|
|
"train_loss": 1.346737027168274,
|
|
"train_precision": 0.7565600797484855,
|
|
"train_recall": 0.41960074270933767,
|
|
"train_runtime": 137.4133,
|
|
"train_samples_per_second": 3.086,
|
|
"train_steps_per_second": 0.386
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 1.9480384588241577,
|
|
"learning_rate": 0.00029486552567237163,
|
|
"loss": 1.357,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.514018691588785,
|
|
"eval_f1": 0.38211575211575216,
|
|
"eval_loss": 1.2584657669067383,
|
|
"eval_precision": 0.8378205128205127,
|
|
"eval_recall": 0.4251082251082251,
|
|
"eval_runtime": 43.051,
|
|
"eval_samples_per_second": 2.485,
|
|
"eval_steps_per_second": 0.325,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"step": 66,
|
|
"train_accuracy": 0.6957547169811321,
|
|
"train_f1": 0.6891574214359025,
|
|
"train_loss": 1.138260841369629,
|
|
"train_precision": 0.8253012477718361,
|
|
"train_recall": 0.6805251227537071,
|
|
"train_runtime": 126.2306,
|
|
"train_samples_per_second": 3.359,
|
|
"train_steps_per_second": 0.42
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"grad_norm": 1.9529035091400146,
|
|
"learning_rate": 0.0002853300733496332,
|
|
"loss": 1.3203,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"eval_accuracy": 0.7476635514018691,
|
|
"eval_f1": 0.7225305110805734,
|
|
"eval_loss": 1.0523829460144043,
|
|
"eval_precision": 0.8176507936507935,
|
|
"eval_recall": 0.7064935064935065,
|
|
"eval_runtime": 39.1299,
|
|
"eval_samples_per_second": 2.734,
|
|
"eval_steps_per_second": 0.358,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 5.96,
|
|
"step": 79,
|
|
"train_accuracy": 0.7594339622641509,
|
|
"train_f1": 0.7384580979394952,
|
|
"train_loss": 0.8829485774040222,
|
|
"train_precision": 0.86050056869729,
|
|
"train_recall": 0.7432874367985035,
|
|
"train_runtime": 133.9565,
|
|
"train_samples_per_second": 3.165,
|
|
"train_steps_per_second": 0.396
|
|
},
|
|
{
|
|
"epoch": 5.96,
|
|
"grad_norm": 2.4536993503570557,
|
|
"learning_rate": 0.00027579462102689484,
|
|
"loss": 1.1706,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 5.96,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_f1": 0.7846877954646693,
|
|
"eval_loss": 0.8008124828338623,
|
|
"eval_precision": 0.8854145854145855,
|
|
"eval_recall": 0.767965367965368,
|
|
"eval_runtime": 40.731,
|
|
"eval_samples_per_second": 2.627,
|
|
"eval_steps_per_second": 0.344,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 6.94,
|
|
"step": 92,
|
|
"train_accuracy": 0.8632075471698113,
|
|
"train_f1": 0.8590685733770105,
|
|
"train_loss": 0.7464644312858582,
|
|
"train_precision": 0.892835269329224,
|
|
"train_recall": 0.8548507199297516,
|
|
"train_runtime": 130.0886,
|
|
"train_samples_per_second": 3.259,
|
|
"train_steps_per_second": 0.407
|
|
},
|
|
{
|
|
"epoch": 6.94,
|
|
"grad_norm": 2.5925121307373047,
|
|
"learning_rate": 0.00026625916870415647,
|
|
"loss": 0.9929,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 6.94,
|
|
"eval_accuracy": 0.9158878504672897,
|
|
"eval_f1": 0.9236075036075035,
|
|
"eval_loss": 0.6253050565719604,
|
|
"eval_precision": 0.9368429298864083,
|
|
"eval_recall": 0.9212121212121211,
|
|
"eval_runtime": 38.4775,
|
|
"eval_samples_per_second": 2.781,
|
|
"eval_steps_per_second": 0.364,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"step": 106,
|
|
"train_accuracy": 0.9080188679245284,
|
|
"train_f1": 0.9066600370668294,
|
|
"train_loss": 0.5274814963340759,
|
|
"train_precision": 0.9124982372811825,
|
|
"train_recall": 0.9042126570890489,
|
|
"train_runtime": 129.4721,
|
|
"train_samples_per_second": 3.275,
|
|
"train_steps_per_second": 0.409
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"grad_norm": 2.86348032951355,
|
|
"learning_rate": 0.0002559902200488997,
|
|
"loss": 0.7633,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.9345794392523364,
|
|
"eval_f1": 0.9342456932845948,
|
|
"eval_loss": 0.46806150674819946,
|
|
"eval_precision": 0.9339420289855072,
|
|
"eval_recall": 0.9437229437229437,
|
|
"eval_runtime": 39.4048,
|
|
"eval_samples_per_second": 2.715,
|
|
"eval_steps_per_second": 0.355,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 8.98,
|
|
"step": 119,
|
|
"train_accuracy": 0.9363207547169812,
|
|
"train_f1": 0.9358357324202157,
|
|
"train_loss": 0.46290820837020874,
|
|
"train_precision": 0.938202392067757,
|
|
"train_recall": 0.9342575372358033,
|
|
"train_runtime": 133.0687,
|
|
"train_samples_per_second": 3.186,
|
|
"train_steps_per_second": 0.398
|
|
},
|
|
{
|
|
"epoch": 8.98,
|
|
"grad_norm": 3.642646074295044,
|
|
"learning_rate": 0.00024645476772616135,
|
|
"loss": 0.6367,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 8.98,
|
|
"eval_accuracy": 0.9158878504672897,
|
|
"eval_f1": 0.9145165945165944,
|
|
"eval_loss": 0.3800387382507324,
|
|
"eval_precision": 0.912056277056277,
|
|
"eval_recall": 0.9194805194805195,
|
|
"eval_runtime": 48.5414,
|
|
"eval_samples_per_second": 2.204,
|
|
"eval_steps_per_second": 0.288,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 9.96,
|
|
"step": 132,
|
|
"train_accuracy": 0.9410377358490566,
|
|
"train_f1": 0.9413105716667156,
|
|
"train_loss": 0.36001139879226685,
|
|
"train_precision": 0.941837807815579,
|
|
"train_recall": 0.9410602738360391,
|
|
"train_runtime": 133.0768,
|
|
"train_samples_per_second": 3.186,
|
|
"train_steps_per_second": 0.398
|
|
},
|
|
{
|
|
"epoch": 9.96,
|
|
"grad_norm": 2.934262275695801,
|
|
"learning_rate": 0.00023691931540342298,
|
|
"loss": 0.5834,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 9.96,
|
|
"eval_accuracy": 0.9532710280373832,
|
|
"eval_f1": 0.9551136455716718,
|
|
"eval_loss": 0.26904991269111633,
|
|
"eval_precision": 0.959457478005865,
|
|
"eval_recall": 0.9541125541125541,
|
|
"eval_runtime": 42.7878,
|
|
"eval_samples_per_second": 2.501,
|
|
"eval_steps_per_second": 0.327,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 10.94,
|
|
"step": 145,
|
|
"train_accuracy": 0.9504716981132075,
|
|
"train_f1": 0.9503020748526174,
|
|
"train_loss": 0.25279200077056885,
|
|
"train_precision": 0.9508748114630468,
|
|
"train_recall": 0.9503638914618925,
|
|
"train_runtime": 134.3359,
|
|
"train_samples_per_second": 3.156,
|
|
"train_steps_per_second": 0.395
|
|
},
|
|
{
|
|
"epoch": 10.94,
|
|
"grad_norm": 3.635103464126587,
|
|
"learning_rate": 0.00022738386308068459,
|
|
"loss": 0.4842,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 10.94,
|
|
"eval_accuracy": 0.9813084112149533,
|
|
"eval_f1": 0.984659090909091,
|
|
"eval_loss": 0.16999471187591553,
|
|
"eval_precision": 0.9826086956521738,
|
|
"eval_recall": 0.9878787878787879,
|
|
"eval_runtime": 49.1618,
|
|
"eval_samples_per_second": 2.176,
|
|
"eval_steps_per_second": 0.285,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"step": 159,
|
|
"train_accuracy": 0.9693396226415094,
|
|
"train_f1": 0.9695757850179305,
|
|
"train_loss": 0.1881800889968872,
|
|
"train_precision": 0.9693384564611929,
|
|
"train_recall": 0.9709818221559601,
|
|
"train_runtime": 128.8721,
|
|
"train_samples_per_second": 3.29,
|
|
"train_steps_per_second": 0.411
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"grad_norm": 2.419642925262451,
|
|
"learning_rate": 0.00021711491442542784,
|
|
"loss": 0.4302,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.9626168224299065,
|
|
"eval_f1": 0.9676432095036744,
|
|
"eval_loss": 0.17427879571914673,
|
|
"eval_precision": 0.9648221343873518,
|
|
"eval_recall": 0.9722943722943723,
|
|
"eval_runtime": 41.3417,
|
|
"eval_samples_per_second": 2.588,
|
|
"eval_steps_per_second": 0.339,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 12.98,
|
|
"step": 172,
|
|
"train_accuracy": 0.964622641509434,
|
|
"train_f1": 0.9651368628644732,
|
|
"train_loss": 0.1646902710199356,
|
|
"train_precision": 0.9645891898165841,
|
|
"train_recall": 0.9665469535253202,
|
|
"train_runtime": 134.1065,
|
|
"train_samples_per_second": 3.162,
|
|
"train_steps_per_second": 0.395
|
|
},
|
|
{
|
|
"epoch": 12.98,
|
|
"grad_norm": 4.919209003448486,
|
|
"learning_rate": 0.00020757946210268947,
|
|
"loss": 0.4422,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 12.98,
|
|
"eval_accuracy": 0.9719626168224299,
|
|
"eval_f1": 0.9771428571428572,
|
|
"eval_loss": 0.13857078552246094,
|
|
"eval_precision": 0.975,
|
|
"eval_recall": 0.9818181818181818,
|
|
"eval_runtime": 39.4778,
|
|
"eval_samples_per_second": 2.71,
|
|
"eval_steps_per_second": 0.355,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 13.96,
|
|
"step": 185,
|
|
"train_accuracy": 0.9716981132075472,
|
|
"train_f1": 0.9709900945487153,
|
|
"train_loss": 0.13838660717010498,
|
|
"train_precision": 0.9710717151425976,
|
|
"train_recall": 0.9710504067284639,
|
|
"train_runtime": 130.0755,
|
|
"train_samples_per_second": 3.26,
|
|
"train_steps_per_second": 0.407
|
|
},
|
|
{
|
|
"epoch": 13.96,
|
|
"grad_norm": 4.021721839904785,
|
|
"learning_rate": 0.0001980440097799511,
|
|
"loss": 0.4237,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 13.96,
|
|
"eval_accuracy": 0.9626168224299065,
|
|
"eval_f1": 0.9697054698457223,
|
|
"eval_loss": 0.12292856723070145,
|
|
"eval_precision": 0.968,
|
|
"eval_recall": 0.9757575757575758,
|
|
"eval_runtime": 39.3533,
|
|
"eval_samples_per_second": 2.719,
|
|
"eval_steps_per_second": 0.356,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 14.94,
|
|
"step": 198,
|
|
"train_accuracy": 0.9716981132075472,
|
|
"train_f1": 0.9707363445629333,
|
|
"train_loss": 0.15636524558067322,
|
|
"train_precision": 0.9719248605013513,
|
|
"train_recall": 0.9701030873944789,
|
|
"train_runtime": 128.9206,
|
|
"train_samples_per_second": 3.289,
|
|
"train_steps_per_second": 0.411
|
|
},
|
|
{
|
|
"epoch": 14.94,
|
|
"grad_norm": 2.6144134998321533,
|
|
"learning_rate": 0.00018850855745721268,
|
|
"loss": 0.367,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 14.94,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.1049351617693901,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.5629,
|
|
"eval_samples_per_second": 2.705,
|
|
"eval_steps_per_second": 0.354,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"step": 212,
|
|
"train_accuracy": 0.9858490566037735,
|
|
"train_f1": 0.9859794210341276,
|
|
"train_loss": 0.11216574162244797,
|
|
"train_precision": 0.9859128049064834,
|
|
"train_recall": 0.986104018607261,
|
|
"train_runtime": 132.2515,
|
|
"train_samples_per_second": 3.206,
|
|
"train_steps_per_second": 0.401
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"grad_norm": 2.7048161029815674,
|
|
"learning_rate": 0.000178239608801956,
|
|
"loss": 0.4376,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.08710027486085892,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 42.0413,
|
|
"eval_samples_per_second": 2.545,
|
|
"eval_steps_per_second": 0.333,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 16.98,
|
|
"step": 225,
|
|
"train_accuracy": 0.9787735849056604,
|
|
"train_f1": 0.978423027691737,
|
|
"train_loss": 0.10880015045404434,
|
|
"train_precision": 0.9783562367864693,
|
|
"train_recall": 0.9791313538827833,
|
|
"train_runtime": 131.0592,
|
|
"train_samples_per_second": 3.235,
|
|
"train_steps_per_second": 0.404
|
|
},
|
|
{
|
|
"epoch": 16.98,
|
|
"grad_norm": 2.9348771572113037,
|
|
"learning_rate": 0.0001687041564792176,
|
|
"loss": 0.3638,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 16.98,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.07979033887386322,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 38.9013,
|
|
"eval_samples_per_second": 2.751,
|
|
"eval_steps_per_second": 0.36,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 17.96,
|
|
"step": 238,
|
|
"train_accuracy": 0.9811320754716981,
|
|
"train_f1": 0.9813568397733909,
|
|
"train_loss": 0.12247739732265472,
|
|
"train_precision": 0.9824647159390165,
|
|
"train_recall": 0.9805006998510924,
|
|
"train_runtime": 135.9878,
|
|
"train_samples_per_second": 3.118,
|
|
"train_steps_per_second": 0.39
|
|
},
|
|
{
|
|
"epoch": 17.96,
|
|
"grad_norm": 2.9127988815307617,
|
|
"learning_rate": 0.0001591687041564792,
|
|
"loss": 0.3758,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 17.96,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.05758798122406006,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 40.1837,
|
|
"eval_samples_per_second": 2.663,
|
|
"eval_steps_per_second": 0.348,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 18.94,
|
|
"step": 251,
|
|
"train_accuracy": 0.9858490566037735,
|
|
"train_f1": 0.9849505768779323,
|
|
"train_loss": 0.08141080290079117,
|
|
"train_precision": 0.9856198097123687,
|
|
"train_recall": 0.9845945870999945,
|
|
"train_runtime": 131.3411,
|
|
"train_samples_per_second": 3.228,
|
|
"train_steps_per_second": 0.404
|
|
},
|
|
{
|
|
"epoch": 18.94,
|
|
"grad_norm": 2.887089252471924,
|
|
"learning_rate": 0.00014963325183374083,
|
|
"loss": 0.2759,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 18.94,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.06044730544090271,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 56.9665,
|
|
"eval_samples_per_second": 1.878,
|
|
"eval_steps_per_second": 0.246,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"step": 265,
|
|
"train_accuracy": 0.9834905660377359,
|
|
"train_f1": 0.9841577997732367,
|
|
"train_loss": 0.09728587418794632,
|
|
"train_precision": 0.9841397108638489,
|
|
"train_recall": 0.9842617289830912,
|
|
"train_runtime": 131.2678,
|
|
"train_samples_per_second": 3.23,
|
|
"train_steps_per_second": 0.404
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"grad_norm": 2.2641186714172363,
|
|
"learning_rate": 0.00013936430317848408,
|
|
"loss": 0.3212,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.9813084112149533,
|
|
"eval_f1": 0.984659090909091,
|
|
"eval_loss": 0.09081904590129852,
|
|
"eval_precision": 0.9826086956521738,
|
|
"eval_recall": 0.9878787878787879,
|
|
"eval_runtime": 39.7053,
|
|
"eval_samples_per_second": 2.695,
|
|
"eval_steps_per_second": 0.353,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 20.98,
|
|
"step": 278,
|
|
"train_accuracy": 0.9740566037735849,
|
|
"train_f1": 0.9730482239763667,
|
|
"train_loss": 0.12885905802249908,
|
|
"train_precision": 0.9755676794807229,
|
|
"train_recall": 0.9715263957551142,
|
|
"train_runtime": 129.4684,
|
|
"train_samples_per_second": 3.275,
|
|
"train_steps_per_second": 0.409
|
|
},
|
|
{
|
|
"epoch": 20.98,
|
|
"grad_norm": 3.7218384742736816,
|
|
"learning_rate": 0.00012982885085574571,
|
|
"loss": 0.3215,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 20.98,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.08540945500135422,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.1648,
|
|
"eval_samples_per_second": 2.732,
|
|
"eval_steps_per_second": 0.357,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 21.96,
|
|
"step": 291,
|
|
"train_accuracy": 0.9882075471698113,
|
|
"train_f1": 0.987640276713749,
|
|
"train_loss": 0.08325836062431335,
|
|
"train_precision": 0.9890280836661814,
|
|
"train_recall": 0.9866258111031001,
|
|
"train_runtime": 124.9541,
|
|
"train_samples_per_second": 3.393,
|
|
"train_steps_per_second": 0.424
|
|
},
|
|
{
|
|
"epoch": 21.96,
|
|
"grad_norm": 5.685765743255615,
|
|
"learning_rate": 0.00012029339853300733,
|
|
"loss": 0.3545,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 21.96,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.07166730612516403,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.2775,
|
|
"eval_samples_per_second": 2.724,
|
|
"eval_steps_per_second": 0.356,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 22.94,
|
|
"step": 304,
|
|
"train_accuracy": 0.9834905660377359,
|
|
"train_f1": 0.9840178843462146,
|
|
"train_loss": 0.09864702820777893,
|
|
"train_precision": 0.9840027079631041,
|
|
"train_recall": 0.9844752990764272,
|
|
"train_runtime": 132.2941,
|
|
"train_samples_per_second": 3.205,
|
|
"train_steps_per_second": 0.401
|
|
},
|
|
{
|
|
"epoch": 22.94,
|
|
"grad_norm": 4.6724138259887695,
|
|
"learning_rate": 0.00011075794621026893,
|
|
"loss": 0.3085,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 22.94,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.08209435641765594,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.8399,
|
|
"eval_samples_per_second": 2.686,
|
|
"eval_steps_per_second": 0.351,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"step": 318,
|
|
"train_accuracy": 0.9858490566037735,
|
|
"train_f1": 0.9856773749159446,
|
|
"train_loss": 0.08332642912864685,
|
|
"train_precision": 0.9871632432676922,
|
|
"train_recall": 0.9844707203986204,
|
|
"train_runtime": 130.1118,
|
|
"train_samples_per_second": 3.259,
|
|
"train_steps_per_second": 0.407
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"grad_norm": 4.654536724090576,
|
|
"learning_rate": 0.00010048899755501222,
|
|
"loss": 0.2637,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.0654672160744667,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.6356,
|
|
"eval_samples_per_second": 2.7,
|
|
"eval_steps_per_second": 0.353,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 24.98,
|
|
"step": 331,
|
|
"train_accuracy": 0.9858490566037735,
|
|
"train_f1": 0.986589461820231,
|
|
"train_loss": 0.07109413295984268,
|
|
"train_precision": 0.9871650821089023,
|
|
"train_recall": 0.98621216568729,
|
|
"train_runtime": 128.7284,
|
|
"train_samples_per_second": 3.294,
|
|
"train_steps_per_second": 0.412
|
|
},
|
|
{
|
|
"epoch": 24.98,
|
|
"grad_norm": 4.322335243225098,
|
|
"learning_rate": 9.095354523227383e-05,
|
|
"loss": 0.2723,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 24.98,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.06855478882789612,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 38.4718,
|
|
"eval_samples_per_second": 2.781,
|
|
"eval_steps_per_second": 0.364,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 25.96,
|
|
"step": 344,
|
|
"train_accuracy": 0.9764150943396226,
|
|
"train_f1": 0.9756975326292678,
|
|
"train_loss": 0.09106432646512985,
|
|
"train_precision": 0.9760480054398313,
|
|
"train_recall": 0.9753730488175062,
|
|
"train_runtime": 132.4151,
|
|
"train_samples_per_second": 3.202,
|
|
"train_steps_per_second": 0.4
|
|
},
|
|
{
|
|
"epoch": 25.96,
|
|
"grad_norm": 4.0115766525268555,
|
|
"learning_rate": 8.141809290953544e-05,
|
|
"loss": 0.36,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 25.96,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.0726209431886673,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 40.7509,
|
|
"eval_samples_per_second": 2.626,
|
|
"eval_steps_per_second": 0.344,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 26.94,
|
|
"step": 357,
|
|
"train_accuracy": 0.9740566037735849,
|
|
"train_f1": 0.9741101148833959,
|
|
"train_loss": 0.08721727132797241,
|
|
"train_precision": 0.9743783993783992,
|
|
"train_recall": 0.9741037543564772,
|
|
"train_runtime": 133.2307,
|
|
"train_samples_per_second": 3.182,
|
|
"train_steps_per_second": 0.398
|
|
},
|
|
{
|
|
"epoch": 26.94,
|
|
"grad_norm": 2.0709145069122314,
|
|
"learning_rate": 7.188264058679705e-05,
|
|
"loss": 0.2535,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 26.94,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.06701695173978806,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.1128,
|
|
"eval_samples_per_second": 2.736,
|
|
"eval_steps_per_second": 0.358,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"step": 371,
|
|
"train_accuracy": 0.9811320754716981,
|
|
"train_f1": 0.9811807715855693,
|
|
"train_loss": 0.08411888033151627,
|
|
"train_precision": 0.9816060153438795,
|
|
"train_recall": 0.9810544308058603,
|
|
"train_runtime": 138.1723,
|
|
"train_samples_per_second": 3.069,
|
|
"train_steps_per_second": 0.384
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"grad_norm": 2.612853527069092,
|
|
"learning_rate": 6.161369193154034e-05,
|
|
"loss": 0.2551,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.0589648000895977,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.9978,
|
|
"eval_samples_per_second": 2.675,
|
|
"eval_steps_per_second": 0.35,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 28.98,
|
|
"step": 384,
|
|
"train_accuracy": 0.9764150943396226,
|
|
"train_f1": 0.9760814191422504,
|
|
"train_loss": 0.08149362355470657,
|
|
"train_precision": 0.9773233573176615,
|
|
"train_recall": 0.9755777028177874,
|
|
"train_runtime": 158.8587,
|
|
"train_samples_per_second": 2.669,
|
|
"train_steps_per_second": 0.334
|
|
},
|
|
{
|
|
"epoch": 28.98,
|
|
"grad_norm": 3.303999662399292,
|
|
"learning_rate": 5.207823960880195e-05,
|
|
"loss": 0.3202,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 28.98,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.05450604483485222,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 40.1368,
|
|
"eval_samples_per_second": 2.666,
|
|
"eval_steps_per_second": 0.349,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 29.96,
|
|
"step": 397,
|
|
"train_accuracy": 0.9976415094339622,
|
|
"train_f1": 0.9975194273245798,
|
|
"train_loss": 0.05407993122935295,
|
|
"train_precision": 0.9977011494252874,
|
|
"train_recall": 0.9973684210526315,
|
|
"train_runtime": 137.913,
|
|
"train_samples_per_second": 3.074,
|
|
"train_steps_per_second": 0.384
|
|
},
|
|
{
|
|
"epoch": 29.96,
|
|
"grad_norm": 2.247615098953247,
|
|
"learning_rate": 4.2542787286063565e-05,
|
|
"loss": 0.2714,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 29.96,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.04375358670949936,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 40.7987,
|
|
"eval_samples_per_second": 2.623,
|
|
"eval_steps_per_second": 0.343,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 30.94,
|
|
"step": 410,
|
|
"train_accuracy": 0.9834905660377359,
|
|
"train_f1": 0.9834196702554923,
|
|
"train_loss": 0.06718786805868149,
|
|
"train_precision": 0.9847436600428245,
|
|
"train_recall": 0.9826715101769175,
|
|
"train_runtime": 130.6854,
|
|
"train_samples_per_second": 3.244,
|
|
"train_steps_per_second": 0.406
|
|
},
|
|
{
|
|
"epoch": 30.94,
|
|
"grad_norm": 4.1858415603637695,
|
|
"learning_rate": 3.300733496332518e-05,
|
|
"loss": 0.2362,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 30.94,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.05211889371275902,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.7771,
|
|
"eval_samples_per_second": 2.69,
|
|
"eval_steps_per_second": 0.352,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"step": 424,
|
|
"train_accuracy": 0.9834905660377359,
|
|
"train_f1": 0.9831986473322998,
|
|
"train_loss": 0.09282960742712021,
|
|
"train_precision": 0.9842538190364276,
|
|
"train_recall": 0.9827365778544793,
|
|
"train_runtime": 128.6901,
|
|
"train_samples_per_second": 3.295,
|
|
"train_steps_per_second": 0.412
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"grad_norm": 3.333651542663574,
|
|
"learning_rate": 2.273838630806846e-05,
|
|
"loss": 0.2693,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.03920552134513855,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.2034,
|
|
"eval_samples_per_second": 2.729,
|
|
"eval_steps_per_second": 0.357,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 32.98,
|
|
"step": 437,
|
|
"train_accuracy": 0.9834905660377359,
|
|
"train_f1": 0.9830305559415786,
|
|
"train_loss": 0.06464195251464844,
|
|
"train_precision": 0.9849462365591398,
|
|
"train_recall": 0.9821793023126837,
|
|
"train_runtime": 132.3464,
|
|
"train_samples_per_second": 3.204,
|
|
"train_steps_per_second": 0.4
|
|
},
|
|
{
|
|
"epoch": 32.98,
|
|
"grad_norm": 4.870348930358887,
|
|
"learning_rate": 1.3202933985330072e-05,
|
|
"loss": 0.2644,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 32.98,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.06383071094751358,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.8229,
|
|
"eval_samples_per_second": 2.687,
|
|
"eval_steps_per_second": 0.352,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 33.96,
|
|
"step": 450,
|
|
"train_accuracy": 0.9834905660377359,
|
|
"train_f1": 0.983306080394328,
|
|
"train_loss": 0.09585532546043396,
|
|
"train_precision": 0.9838161838161839,
|
|
"train_recall": 0.983380012201209,
|
|
"train_runtime": 133.1315,
|
|
"train_samples_per_second": 3.185,
|
|
"train_steps_per_second": 0.398
|
|
},
|
|
{
|
|
"epoch": 33.96,
|
|
"grad_norm": 3.289733409881592,
|
|
"learning_rate": 3.667481662591687e-06,
|
|
"loss": 0.2516,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 33.96,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.04781457036733627,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 40.0245,
|
|
"eval_samples_per_second": 2.673,
|
|
"eval_steps_per_second": 0.35,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 34.34,
|
|
"step": 455,
|
|
"train_accuracy": 0.9740566037735849,
|
|
"train_f1": 0.9731839886499556,
|
|
"train_loss": 0.0982045829296112,
|
|
"train_precision": 0.9733410852713178,
|
|
"train_recall": 0.9733575444357457,
|
|
"train_runtime": 130.0436,
|
|
"train_samples_per_second": 3.26,
|
|
"train_steps_per_second": 0.408
|
|
},
|
|
{
|
|
"epoch": 34.34,
|
|
"grad_norm": 2.205134630203247,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.2652,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 34.34,
|
|
"eval_accuracy": 0.9906542056074766,
|
|
"eval_f1": 0.9922719141323793,
|
|
"eval_loss": 0.0579226091504097,
|
|
"eval_precision": 0.990909090909091,
|
|
"eval_recall": 0.9939393939393939,
|
|
"eval_runtime": 39.7063,
|
|
"eval_samples_per_second": 2.695,
|
|
"eval_steps_per_second": 0.353,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 34.34,
|
|
"step": 455,
|
|
"total_flos": 3.0926830773436416e+17,
|
|
"train_loss": 0.571298942723117,
|
|
"train_runtime": 12111.8963,
|
|
"train_samples_per_second": 1.225,
|
|
"train_steps_per_second": 0.038
|
|
},
|
|
{
|
|
"epoch": 34.34,
|
|
"step": 455,
|
|
"total_flos": 3.0926830773436416e+17,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 0.0155,
|
|
"train_samples_per_second": 955313.811,
|
|
"train_steps_per_second": 29290.282
|
|
}
|
|
],
|
|
"logging_steps": 50,
|
|
"max_steps": 455,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 35,
|
|
"save_steps": 500,
|
|
"total_flos": 3.0926830773436416e+17,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|